Files
redux-scraper/app/jobs/domain/fa/job/favs_job.rb
2025-06-17 05:56:11 +00:00

171 lines
5.0 KiB
Ruby

# typed: strict
class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
include HasBulkEnqueueJobs
queue_as :fa_user_favs
queue_with_priority do
T.bind(self, Domain::Fa::Job::FavsJob)
user_from_args!.scanned_favs_at.nil? ? -11 : -1
end
FAVED_POSTS_PER_PAGE_THRESHOLD = T.let(Rails.env.test? ? 2 : 36, Integer)
MAX_PAGE_NUMBER = 2000
sig { params(args: T.untyped).void }
def initialize(*args)
super(*T.unsafe(args))
@page_id = T.let(nil, T.nilable(String))
@request_number = T.let(0, Integer)
end
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
full_scan = !!args[:full_scan]
user = user_from_args!(create_if_missing: true)
logger.push_tags(make_arg_tag(user))
return if buggy_user?(user)
unless user_due_for_favs_scan?(user)
logger.warn(format_tags("user not due for favs scan, skipping"))
return
end
faved_post_ids = T.let(Set.new, T::Set[Integer])
existing_faved_post_ids =
T.let(Set.new(user.user_post_favs.pluck(:post_id)), T::Set[Integer])
logger.info(
format_tags(
make_tag("server num favorites", user.num_favorites),
make_tag("existing favorites", existing_faved_post_ids.size),
make_tag("max page number", MAX_PAGE_NUMBER),
),
)
while true
ret = scan_next_page(user: user)
return if ret.is_a?(ScanPageResult::Stop)
faved_post_ids += ret.faved_post_ids_on_page
new_faved_post_ids_on_page =
ret.faved_post_ids_on_page - existing_faved_post_ids
logger.info(
format_tags(
make_tag("request number", @request_number),
make_tag("new favs", new_faved_post_ids_on_page.size),
make_tag("created post models", ret.posts_created_ids.size),
),
)
if !full_scan &&
(new_faved_post_ids_on_page.size < FAVED_POSTS_PER_PAGE_THRESHOLD)
logger.info(format_tags("incremenetal scan, stopping"))
break
end
unless ret.keep_scanning
logger.info(format_tags("no next favs page, stopping"))
break
end
if @request_number > MAX_PAGE_NUMBER
logger.warn(
format_tags(
"request number(#{@request_number}) > max page number(#{MAX_PAGE_NUMBER})",
),
)
break
end
@request_number += 1
end
faved_post_ids_to_add = faved_post_ids - existing_faved_post_ids
upsert_faved_post_ids(user:, post_ids: faved_post_ids_to_add)
ensure
user.save! if user
end
private
sig { params(user: Domain::User::FaUser, post_ids: T::Set[Integer]).void }
def upsert_faved_post_ids(user:, post_ids:)
ReduxApplicationRecord.transaction do
if post_ids.any?
post_ids.each_slice(1000) do |slice|
Domain::UserPostFav.upsert_all(
slice.map { |id| { user_id: user.id, post_id: id } },
unique_by: %i[user_id post_id],
)
end
# Use reset_counters to update the counter cache after using upsert_all
Domain::User.reset_counters(user.id, :user_post_favs)
logger.info(
format_tags(
make_tag("reset user_post_favs counter cache for user", user.id),
),
)
end
user.scanned_favs_at = Time.zone.now
end
logger.info(format_tags(make_tag("total new favs", post_ids.size)))
end
module ScanPageResult
extend T::Sig
class Stop < T::Struct
end
class Ok < T::Struct
include T::Struct::ActsAsComparable
const :faved_post_ids_on_page, T::Set[Integer]
const :posts_created_ids, T::Set[Integer]
const :keep_scanning, T::Boolean
end
Result = T.type_alias { T.any(Stop, Ok) }
end
sig { params(user: Domain::User::FaUser).returns(ScanPageResult::Result) }
def scan_next_page(user:)
url =
if @page_id
"https://www.furaffinity.net/favorites/#{user.url_name}/#{@page_id}/next"
else
"https://www.furaffinity.net/favorites/#{user.url_name}/"
end
response = http_client.get(url)
if response.status_code != 200
fatal_error(
"http #{response.status_code.to_s.red.bold}, " +
"log entry #{response.log_entry.id.to_s.bold}",
)
end
disabled_or_not_found = user_disabled_or_not_found?(user, response)
user.scanned_favs_at = Time.current
return ScanPageResult::Stop.new if disabled_or_not_found
page_parser = Domain::Fa::Parser::Page.new(response.body)
return ScanPageResult::Stop.new unless page_parser.probably_listings_page?
listing_page_stats =
update_and_enqueue_posts_from_listings_page(
ListingPageType::FavsPage.new(page_number: @page_id, user:),
page_parser:,
)
@page_id = page_parser.favorites_next_button_id
ScanPageResult::Ok.new(
faved_post_ids_on_page:
listing_page_stats.all_posts.map(&:id).compact.to_set,
posts_created_ids: listing_page_stats.new_posts.map(&:id).compact.to_set,
keep_scanning: @page_id.present?,
)
end
end