Files
redux-scraper/app/jobs/domain/e621/job/scan_user_favs_job.rb
2025-08-20 22:10:57 +00:00

159 lines
4.7 KiB
Ruby

# typed: strict
class Domain::E621::Job::ScanUserFavsJob < Domain::E621::Job::Base
MAX_PAGES_BEFORE_BREAK = 2400
MAX_PER_PAGE = T.let(Rails.env.test? ? 4 : 320, Integer)
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
user = user_from_args!
logger.push_tags(make_arg_tag(user))
logger.info("server indicates #{user.num_other_favs_cached} favs")
if user.scanned_favs_error?
if force_scan?
logger.info(
"scanned favs status is error, but force scan is true, continuing",
)
else
logger.warn("scanned favs status is error, skipping")
return
end
end
last_e621_post_id = T.let(nil, T.nilable(Integer))
breaker = T.let(0, Integer)
post_ids = T.let([], T::Array[Integer])
total_new_posts = 0
loop do
breaker += 1
if breaker > MAX_PAGES_BEFORE_BREAK
logger.error("breaker is too big (#{breaker})")
break
end
url =
"https://e621.net/posts.json?tags=status:any+fav:#{user.url_name}+order:id_desc&limit=#{MAX_PER_PAGE}"
url += "&page=b#{last_e621_post_id.to_s}" if last_e621_post_id
response = http_client.get(url)
if response.status_code == 403 &&
response.body.include?("This users favorites are hidden")
user.favs_are_hidden = true
user.scanned_favs_at = Time.now
user.save!
break
end
if response.status_code != 200
fatal_error(
"non 200 response for /favorites.json: #{response.status_code.to_s.underline}",
)
end
posts_json =
T.cast(
JSON.parse(response.body)["posts"],
T::Array[T::Hash[String, T.untyped]],
)
if posts_json.empty?
logger.info("no posts found on page #{breaker}")
break
end
e621_post_id_to_post_json =
posts_json
.map do |post_json|
[T.cast(post_json["id"].to_i, Integer), post_json]
end
.to_h
logger.info "found #{posts_json.size} favs on page #{breaker}"
e621_id_to_post_id = T.let({}, T::Hash[Integer, Integer])
e621_post_id_to_post_json
.keys
.each_slice(1000) do |e621_post_id_slice|
e621_id_to_post_id.merge!(
Domain::Post::E621Post
.where(e621_id: e621_post_id_slice)
.pluck(:e621_id, :id)
.to_h,
)
end
missing_e621_ids =
e621_post_id_to_post_json.keys - e621_id_to_post_id.keys
if missing_e621_ids.any?
logger.info "creating #{missing_e621_ids.size} posts"
missing_e621_ids.each do |e621_post_id|
post_json = T.must(e621_post_id_to_post_json[e621_post_id])
post =
Domain::E621::TagUtil.initialize_or_update_post(
post_json: post_json,
caused_by_entry: causing_log_entry,
)
was_new = post.new_record?
post.set_index_page_entry(response.log_entry)
post.save!
e621_id_to_post_id[e621_post_id] = T.must(post.id)
if was_new
logger.info("created post #{make_arg_tag(post).join(" ")}")
total_new_posts += 1
end
end
end
post_ids.concat(e621_id_to_post_id.values)
logger.info(
"[total posts: #{post_ids.size}] [total created: #{total_new_posts}]",
)
if posts_json.size < MAX_PER_PAGE
logger.info(
"number of posts #{posts_json.size} < MAX_PER_PAGE (#{MAX_PER_PAGE}), breaking",
)
break
end
last_e621_post_id = T.cast(T.must(posts_json.last)["id"].to_i, Integer)
end
logger.info "upserting #{post_ids.size} favs"
post_ids.each_slice(1000) do |slice|
ReduxApplicationRecord.transaction do
Domain::UserPostFav::E621UserPostFav.upsert_all(
slice.map do |post_id|
{ user_id: user.id, post_id: post_id, removed: false }
end,
unique_by: %i[user_id post_id],
)
end
end
# Use reset_counters to update the counter cache after using upsert_all
Domain::User.reset_counters(user.id, :user_post_favs)
logger.info("[reset user_post_favs counter cache for user: #{user.id}]")
logger.info(
[
"[favs scanned: #{post_ids.size.to_s.bold}]",
"[posts created: #{total_new_posts.to_s.bold}]",
"[total requests: #{breaker}]",
"done",
].join(" "),
)
user.scanned_favs_ok!
user.scanned_favs_at = Time.now
user.save!
rescue StandardError
logger.error("error scanning user favs: #{user&.e621_id}")
user = user_from_args
user.scanned_favs_error! if user
raise
ensure
user.save! if user
logger.pop_tags
end
end