Files
redux-scraper/app/lib/tasks/fa/backfill_favs_and_dates_task.rb
2025-09-10 16:30:51 +00:00

216 lines
5.9 KiB
Ruby

# typed: strict
class Tasks::Fa::BackfillFavsAndDatesTask < Tasks::InterruptableTask
extend T::Sig
include HasColorLogger
class Mode < T::Enum
enums do
Both = new("both")
OnlyFavs = new("favs")
OnlyUserPages = new("profiles")
ForUser = new("for-user")
end
end
sig { override.returns(String) }
def progress_key
tag = "task-fa-backfill-favs-and-dates-#{@mode.serialize}"
tag += "-#{@user&.url_name}" if @mode == Mode::ForUser
tag
end
sig do
params(
mode: Mode,
start_at: T.nilable(String),
log_sink: T.any(IO, StringIO),
user_url_name: T.nilable(String),
batch_size: T.nilable(Integer),
).void
end
def initialize(
mode:,
start_at:,
log_sink: $stderr,
user_url_name: nil,
batch_size: nil
)
super(log_sink:)
@mode = mode
@batch_size = T.let(batch_size || 32, Integer)
if @mode == Mode::ForUser
unless user_url_name.present?
raise("user_url_name is required for mode: #{@mode}")
end
@user =
T.let(
Domain::User::FaUser.find_by(url_name: user_url_name),
T.nilable(Domain::User::FaUser),
)
raise "user not found for #{user_url_name}" unless @user
end
@start_at = T.let(get_progress(start_at&.to_s)&.to_i, T.nilable(Integer))
end
class Stats < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
const :num_user_pages, Integer, default: 0
const :num_favs_pages, Integer, default: 0
const :favs_dates_stats,
Domain::Fa::Job::FavsJob::FavsAndDatesStats,
default: Domain::Fa::Job::FavsJob::FavsAndDatesStats.zero
sig { params(other: Stats).returns(Stats) }
def +(other)
Stats.new(
num_user_pages: num_user_pages + other.num_user_pages,
num_favs_pages: num_favs_pages + other.num_favs_pages,
favs_dates_stats: favs_dates_stats + other.favs_dates_stats,
)
end
sig { returns(Stats) }
def self.zero
Stats.new()
end
sig { returns(String) }
def to_s
[
"[user pages: #{num_user_pages}]",
"[favs pages: #{num_favs_pages}]",
"[total: #{favs_dates_stats.to_s}]",
].join(" ")
end
end
sig { override.void }
def run_impl
total_stats = Stats.zero
query_string =
case @mode
when Mode::Both
"uri_path like '/favorites/%' or uri_path like '/user/%'"
when Mode::OnlyFavs
"uri_path like '/favorites/%'"
when Mode::OnlyUserPages
"uri_path like '/user/%'"
when Mode::ForUser
"uri_path like '/user/#{@user&.url_name}/%' or uri_path like '/favorites/#{@user&.url_name}/%'"
end
query =
HttpLogEntry
.where(uri_host: "www.furaffinity.net")
.where(query_string)
.where(status_code: 200)
if @mode != Mode::ForUser
log("counting relevant log entries...")
total = query.where(id: @start_at..).count
pb = create_progress_bar(total)
else
pb = create_progress_bar(nil)
end
start_profiling!
query
.includes(:response)
.in_batches(start: @start_at, of: @batch_size) do |batch|
batch = batch.to_a
batch_stats = Stats.zero
ReduxApplicationRecord.transaction do
batch.each do |hle|
stats = handle_log_entry(hle)
batch_stats = batch_stats + stats
pb.increment
rescue StandardError => e
log(
"error handling log entry #{hle.id} / #{hle.uri_path}: #{e.message}",
)
ensure
break if interrupted?
end
end
total_stats = total_stats + batch_stats
log(
[
"#{NumberHelper.number_with_delimiter(total_stats.num_user_pages)} user, " +
"#{NumberHelper.number_with_delimiter(total_stats.num_favs_pages)} favs",
"batch: #{batch_stats.favs_dates_stats}",
"total: #{total_stats.favs_dates_stats}",
"with fav id / with date / total",
].map { |s| "[#{s}]" }.join(" "),
)
last_id = batch.last&.id&.to_s
save_progress(last_id) if last_id
break if interrupted?
end
log("total stats: #{total_stats.to_s}")
end
sig { params(hle: HttpLogEntry).returns(Stats) }
def handle_log_entry(hle)
url_name = hle.uri_path&.split("/")&.reject(&:blank?)&.second
raise "no url_name found for #{hle.uri_path}" unless url_name
user = Domain::User::FaUser.find_by(url_name:)
raise "no user found for #{url_name}" unless user
page_parser =
Domain::Fa::Parser::Page.from_log_entry(hle, require_logged_in: false)
return Stats.zero if page_parser.account_disabled?
return Stats.zero if page_parser.logged_in_user.blank?
ReduxApplicationRecord.transaction do
case hle.uri_path
when %r{/favorites/.+}
handle_favs_log_entry(user, page_parser)
when %r{/user/.+}
handle_user_log_entry(user, page_parser)
else
raise "unknown uri path: #{hle.uri_path}"
end
end
end
sig do
params(
user: Domain::User::FaUser,
page_parser: Domain::Fa::Parser::Page,
).returns(Stats)
end
def handle_favs_log_entry(user, page_parser)
raise "not a listings page" unless page_parser.probably_listings_page?
favs_dates_stats =
Domain::Fa::Job::FavsJob.update_favs_and_dates(user:, page_parser:)
Stats.new(favs_dates_stats:, num_favs_pages: 1)
end
sig do
params(
user: Domain::User::FaUser,
page_parser: Domain::Fa::Parser::Page,
).returns(Stats)
end
def handle_user_log_entry(user, page_parser)
raise "not a user page" unless page_parser.probably_user_page?
favs_dates_stats =
Domain::Fa::Job::UserPageJob.update_favs_and_dates(
user,
page_parser.user_page,
)
Stats.new(favs_dates_stats:, num_user_pages: 1)
end
end