Files
redux-scraper/app/jobs/domain/fa/job/user_incremental_job.rb
2025-01-01 03:29:53 +00:00

168 lines
5.3 KiB
Ruby

# typed: true
module Domain::Fa::Job
class UserIncrementalJob < Base
queue_as :fa_user_page
def perform(args)
init_from_args!(args)
# buggy (sentinal) user
return if @user.id == 117_552 && @user.url_name == "click here"
logger.prefix =
proc { "[ #{@user.id.to_s.bold} / #{@user.url_name.bold} ]" }
# this is similar to a user page job, and will update the user page
# however, it will incrementally update user favs & follows / following:
# - favs: look at the 'favorites' section and add new favs
# checking that we can see at least one already recorded fav.
# if not, enqueue a full favs scan job.
# - follows / following: look at the 'watchers' / 'watching' section,
# and add new follows.
if !@user.due_for_incremental_scan? && !@force_scan
logger.warn(
"scanned #{@user.time_ago_for_incremental_scan.bold}, skipping",
)
return
end
response =
http_client.get(
"https://www.furaffinity.net/user/#{@user.url_name}/",
caused_by_entry: @caused_by_entry,
)
@log_entry = response.log_entry
ret, opts =
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(
@user,
response,
)
case ret
when :ok
page = opts[:page]
when :stop
logger.error(opts[:message])
return
when :fatal
fatal_error(opts[:message])
end
Domain::Fa::Job::ScanUserUtils.update_user_fields_from_page(
@user,
page,
response,
)
check_favs(@user, page.user_page.recent_fav_fa_ids)
check_watchers(@user, page.user_page.recent_watchers)
check_watching(@user, page.user_page.recent_watching)
@user.scanned_page_at = Time.now
@user.scanned_incremental_at = Time.now
@user.save!
logger.info "completed page scan"
ensure
if response && response.status_code == 200
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: @user.url_name }],
)
end
end
def check_favs(user, recent_fav_fa_ids)
recent_fav_posts =
Domain::Fa::Job::ScanPostUtils.find_or_create_by_fa_ids(
recent_fav_fa_ids,
caused_by_entry: @log_entry,
)
recent_fav_post_ids = recent_fav_posts.map(&:id)
existing_fav_post_ids =
user.fav_post_joins.where(post_id: recent_fav_post_ids).pluck(:post_id)
missing_fav_post_ids = recent_fav_post_ids - existing_fav_post_ids
if missing_fav_post_ids.empty?
logger.info("no new favs for user")
@user.scanned_favs_at = Time.now
return
end
num_missing = missing_fav_post_ids.size
if num_missing >= 0
logger.info("add #{num_missing.to_s.bold} new favs for user")
@user.fav_post_joins.insert_all!(
missing_fav_post_ids.map { |post_id| { post_id: post_id } },
)
end
if missing_fav_post_ids.include? recent_fav_post_ids.last
logger.info(
"last fav is new (#{num_missing.to_s.bold} missing), enqueue full favs scan",
)
defer_job(
Domain::Fa::Job::FavsJob,
{ user: user, caused_by_entry: @log_entry },
)
else
@user.scanned_favs_at = Time.now
end
end
# who watches this user - does not update scanned_follows_at timestamp
# nor enqueue a full follows scan job
# TODO - may be useful to have a separate 'scan full followed by' job
# to handle users who are watched by a large number of others
def check_watchers(user, recent_watchers)
recent_models =
Domain::Fa::Job::ScanUserUtils.find_or_create_by_names(recent_watchers)
existing =
user
.followed_joins
.where(follower_id: recent_models.map(&:id))
.pluck(:follower_id)
missing = recent_models.reject { |w| existing.include? w.id }
if missing.empty?
logger.info("no new watchers")
return
end
num_missing = missing.size
user.followed_joins.insert_all!(
missing.map { |watcher| { follower_id: watcher.id } },
)
logger.info("added #{num_missing.to_s.bold} new watchers")
end
def check_watching(user, recent_watching)
recent_models =
Domain::Fa::Job::ScanUserUtils.find_or_create_by_names(recent_watching)
existing =
user
.follower_joins
.where(followed_id: recent_models.map(&:id))
.pluck(:followed_id)
missing = recent_models.reject { |w| existing.include? w.id }
if missing.empty?
logger.info("no new users watched")
@user.scanned_follows_at = Time.now
return
end
num_missing = missing.size
user.follower_joins.insert_all!(
missing.map { |watcher| { followed_id: watcher.id } },
)
logger.info("added #{num_missing.to_s.bold} new users watched")
if missing.any? { |w| w.url_name == recent_watching.last.url_name }
logger.info("last user watched is new, enqueue full follows scan")
Domain::Fa::Job::UserFollowsJob.perform_later(
{ user: user, caused_by_entry: @log_entry },
)
else
@user.scanned_follows_at = Time.now
end
end
end
end