183 lines
4.9 KiB
Ruby
183 lines
4.9 KiB
Ruby
# typed: true
|
|
class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
|
include HasBulkEnqueueJobs
|
|
|
|
USERS_PER_FULL_PAGE = Rails.env.test? ? 9 : 190
|
|
|
|
queue_as :fa_user_favs
|
|
|
|
def perform(args)
|
|
@first_job_entry = nil
|
|
init_from_args!(args, build_user: false)
|
|
@full_scan = !!args[:full_scan] || @force_scan
|
|
@user ||
|
|
begin
|
|
Domain::Fa::Job::UserPageJob.perform_later(
|
|
{ url_name: args[:url_name], caused_by_entry: best_caused_by_entry },
|
|
)
|
|
fatal_error("user does not exist: #{args}")
|
|
end
|
|
|
|
logger.prefix =
|
|
"[#{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"
|
|
return unless user_due_for_scan?(:favs)
|
|
|
|
@page_number = 0
|
|
@page_id = nil
|
|
@total_items_seen = 0
|
|
@seen_post_ids = Set.new
|
|
existing_faved_ids = Set.new(@user.fav_post_joins.active.pluck(:post_id))
|
|
|
|
while true
|
|
ret = scan_page
|
|
break if ret == :break
|
|
return if ret == :stop
|
|
|
|
if !@full_scan
|
|
new_favs = @last_page_post_ids - existing_faved_ids
|
|
if new_favs.empty?
|
|
@user.scanned_favs_at = Time.now
|
|
@user.save!
|
|
logger.info "[no new favs] [stopping scan]"
|
|
return
|
|
end
|
|
end
|
|
|
|
# bail out at 100,000 items
|
|
break if @page_number > 500
|
|
@page_number += 1
|
|
end
|
|
|
|
to_remove = existing_faved_ids - @seen_post_ids
|
|
to_add = @seen_post_ids - existing_faved_ids
|
|
logger.info "[calc change favs] [add #{to_add.size.to_s.bold}] [remove #{to_remove.size.to_s.bold}]"
|
|
|
|
ReduxApplicationRecord.transaction do
|
|
if to_remove.any?
|
|
@user
|
|
.fav_post_joins
|
|
.active
|
|
.where(post_id: to_remove)
|
|
.update_all(removed: true)
|
|
end
|
|
|
|
slice_size =
|
|
if to_add.size <= 2500
|
|
100
|
|
else
|
|
1000
|
|
end
|
|
|
|
if to_add.any?
|
|
to_add.each_slice(slice_size) do |slice|
|
|
@user.fav_post_joins.upsert_all(
|
|
slice.map { |id| { post_id: id, removed: false } },
|
|
unique_by: :index_domain_fa_favs_on_user_id_and_post_id,
|
|
update_only: [:removed],
|
|
)
|
|
end
|
|
end
|
|
|
|
@user.scanned_favs_at = Time.now
|
|
@user.save!
|
|
end
|
|
logger.info "[updated favs list] [posts: #{@user.fav_post_joins.count.to_s.bold}]"
|
|
end
|
|
|
|
private
|
|
|
|
def scan_page
|
|
ret = nil
|
|
|
|
url =
|
|
if @page_id
|
|
"https://www.furaffinity.net/favorites/#{@user.url_name}/#{@page_id}/next"
|
|
else
|
|
"https://www.furaffinity.net/favorites/#{@user.url_name}/"
|
|
end
|
|
response = http_client.get(url, caused_by_entry: best_caused_by_entry)
|
|
@first_job_entry ||= response.log_entry
|
|
if response.status_code != 200
|
|
fatal_error(
|
|
"http #{response.status_code.to_s.red.bold}, " +
|
|
"log entry #{response.log_entry.id.to_s.bold}",
|
|
)
|
|
end
|
|
|
|
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
|
|
@user,
|
|
response,
|
|
)
|
|
logger.error("account disabled / not found, abort")
|
|
return :stop
|
|
end
|
|
|
|
page = Domain::Fa::Parser::Page.new(response.body)
|
|
fatal_error("not a favs listing page") unless page.probably_listings_page?
|
|
submissions = page.submissions_parsed
|
|
@page_id = page.favorites_next_button_id
|
|
ret = :break if @page_id.nil?
|
|
@total_items_seen += submissions.length
|
|
|
|
posts_to_create_hashes = []
|
|
existing_fa_id_to_post_id =
|
|
Domain::Fa::Post
|
|
.where(fa_id: submissions.map(&:id))
|
|
.pluck(:fa_id, :id)
|
|
.to_h
|
|
|
|
posts_to_create_hashes =
|
|
submissions
|
|
.reject { |submission| existing_fa_id_to_post_id[submission.id] }
|
|
.map do |submission|
|
|
Domain::Fa::Post.hash_from_submission_parser_helper(
|
|
submission,
|
|
first_seen_log_entry: response.log_entry,
|
|
)
|
|
end
|
|
|
|
created_post_ids = []
|
|
created_post_ids =
|
|
Domain::Fa::Post
|
|
.insert_all!(posts_to_create_hashes, returning: %i[id fa_id])
|
|
.map { |row| row["id"] } unless posts_to_create_hashes.empty?
|
|
|
|
enqueue_new_post_scan_jobs(
|
|
posts_to_create_hashes.map { |hash| hash[:fa_id] },
|
|
)
|
|
|
|
@last_page_post_ids = Set.new
|
|
created_post_ids.each do |id|
|
|
@seen_post_ids.add(id)
|
|
@last_page_post_ids.add(id)
|
|
end
|
|
existing_fa_id_to_post_id.values.each do |id|
|
|
@seen_post_ids.add(id)
|
|
@last_page_post_ids.add(id)
|
|
end
|
|
|
|
logger.info [
|
|
"[page #{@page_number.to_s.bold}]",
|
|
"[posts: #{submissions.length.to_s.bold}]",
|
|
"[created: #{posts_to_create_hashes.size.to_s.bold}]",
|
|
].join(" ")
|
|
|
|
ret
|
|
end
|
|
|
|
def enqueue_new_post_scan_jobs(fa_ids)
|
|
bulk_enqueue_jobs do
|
|
fa_ids.each do |fa_id|
|
|
Domain::Fa::Job::ScanPostJob.perform_later(
|
|
{ fa_id: fa_id, caused_by_entry: best_caused_by_entry },
|
|
)
|
|
# sleep 100000
|
|
end
|
|
end
|
|
end
|
|
|
|
def best_caused_by_entry
|
|
@first_job_entry || @caused_by_entry
|
|
end
|
|
end
|