216 lines
5.9 KiB
Ruby
216 lines
5.9 KiB
Ruby
# typed: strict
|
|
class Tasks::Fa::BackfillFavsAndDatesTask < Tasks::InterruptableTask
|
|
extend T::Sig
|
|
include HasColorLogger
|
|
|
|
class Mode < T::Enum
|
|
enums do
|
|
Both = new("both")
|
|
OnlyFavs = new("favs")
|
|
OnlyUserPages = new("profiles")
|
|
ForUser = new("for-user")
|
|
end
|
|
end
|
|
|
|
sig { override.returns(String) }
|
|
def progress_key
|
|
tag = "task-fa-backfill-favs-and-dates-#{@mode.serialize}"
|
|
tag += "-#{@user&.url_name}" if @mode == Mode::ForUser
|
|
tag
|
|
end
|
|
|
|
sig do
|
|
params(
|
|
mode: Mode,
|
|
start_at: T.nilable(String),
|
|
log_sink: T.any(IO, StringIO),
|
|
user_url_name: T.nilable(String),
|
|
batch_size: T.nilable(Integer),
|
|
).void
|
|
end
|
|
def initialize(
|
|
mode:,
|
|
start_at:,
|
|
log_sink: $stderr,
|
|
user_url_name: nil,
|
|
batch_size: nil
|
|
)
|
|
super(log_sink:)
|
|
@mode = mode
|
|
@batch_size = T.let(batch_size || 32, Integer)
|
|
|
|
if @mode == Mode::ForUser
|
|
unless user_url_name.present?
|
|
raise("user_url_name is required for mode: #{@mode}")
|
|
end
|
|
@user =
|
|
T.let(
|
|
Domain::User::FaUser.find_by(url_name: user_url_name),
|
|
T.nilable(Domain::User::FaUser),
|
|
)
|
|
raise "user not found for #{user_url_name}" unless @user
|
|
end
|
|
|
|
@start_at = T.let(get_progress(start_at&.to_s)&.to_i, T.nilable(Integer))
|
|
end
|
|
|
|
class Stats < T::ImmutableStruct
|
|
extend T::Sig
|
|
include T::Struct::ActsAsComparable
|
|
|
|
const :num_user_pages, Integer, default: 0
|
|
const :num_favs_pages, Integer, default: 0
|
|
const :favs_dates_stats,
|
|
Domain::Fa::Job::FavsJob::FavsAndDatesStats,
|
|
default: Domain::Fa::Job::FavsJob::FavsAndDatesStats.zero
|
|
|
|
sig { params(other: Stats).returns(Stats) }
|
|
def +(other)
|
|
Stats.new(
|
|
num_user_pages: num_user_pages + other.num_user_pages,
|
|
num_favs_pages: num_favs_pages + other.num_favs_pages,
|
|
favs_dates_stats: favs_dates_stats + other.favs_dates_stats,
|
|
)
|
|
end
|
|
|
|
sig { returns(Stats) }
|
|
def self.zero
|
|
Stats.new()
|
|
end
|
|
|
|
sig { returns(String) }
|
|
def to_s
|
|
[
|
|
"[user pages: #{num_user_pages}]",
|
|
"[favs pages: #{num_favs_pages}]",
|
|
"[total: #{favs_dates_stats.to_s}]",
|
|
].join(" ")
|
|
end
|
|
end
|
|
|
|
sig { override.void }
|
|
def run_impl
|
|
total_stats = Stats.zero
|
|
|
|
query_string =
|
|
case @mode
|
|
when Mode::Both
|
|
"uri_path like '/favorites/%' or uri_path like '/user/%'"
|
|
when Mode::OnlyFavs
|
|
"uri_path like '/favorites/%'"
|
|
when Mode::OnlyUserPages
|
|
"uri_path like '/user/%'"
|
|
when Mode::ForUser
|
|
"uri_path like '/user/#{@user&.url_name}/%' or uri_path like '/favorites/#{@user&.url_name}/%'"
|
|
end
|
|
|
|
query =
|
|
HttpLogEntry
|
|
.where(uri_host: "www.furaffinity.net")
|
|
.where(query_string)
|
|
.where(status_code: 200)
|
|
|
|
if @mode != Mode::ForUser
|
|
log("counting relevant log entries...")
|
|
total = query.where(id: @start_at..).count
|
|
pb = create_progress_bar(total)
|
|
else
|
|
pb = create_progress_bar(nil)
|
|
end
|
|
|
|
start_profiling!
|
|
|
|
query
|
|
.includes(:response)
|
|
.in_batches(start: @start_at, of: @batch_size) do |batch|
|
|
batch = batch.to_a
|
|
batch_stats = Stats.zero
|
|
|
|
ReduxApplicationRecord.transaction do
|
|
batch.each do |hle|
|
|
stats = handle_log_entry(hle)
|
|
batch_stats = batch_stats + stats
|
|
pb.increment
|
|
rescue StandardError => e
|
|
log(
|
|
"error handling log entry #{hle.id} / #{hle.uri_path}: #{e.message}",
|
|
)
|
|
ensure
|
|
break if interrupted?
|
|
end
|
|
end
|
|
|
|
total_stats = total_stats + batch_stats
|
|
log(
|
|
[
|
|
"#{NumberHelper.number_with_delimiter(total_stats.num_user_pages)} user, " +
|
|
"#{NumberHelper.number_with_delimiter(total_stats.num_favs_pages)} favs",
|
|
"batch: #{batch_stats.favs_dates_stats}",
|
|
"total: #{total_stats.favs_dates_stats}",
|
|
"with fav id / with date / total",
|
|
].map { |s| "[#{s}]" }.join(" "),
|
|
)
|
|
|
|
last_id = batch.last&.id&.to_s
|
|
save_progress(last_id) if last_id
|
|
break if interrupted?
|
|
end
|
|
|
|
log("total stats: #{total_stats.to_s}")
|
|
end
|
|
|
|
sig { params(hle: HttpLogEntry).returns(Stats) }
|
|
def handle_log_entry(hle)
|
|
url_name = hle.uri_path&.split("/")&.reject(&:blank?)&.second
|
|
raise "no url_name found for #{hle.uri_path}" unless url_name
|
|
|
|
user = Domain::User::FaUser.find_by(url_name:)
|
|
raise "no user found for #{url_name}" unless user
|
|
|
|
page_parser =
|
|
Domain::Fa::Parser::Page.from_log_entry(hle, require_logged_in: false)
|
|
return Stats.zero if page_parser.account_disabled?
|
|
return Stats.zero if page_parser.logged_in_user.blank?
|
|
|
|
ReduxApplicationRecord.transaction do
|
|
case hle.uri_path
|
|
when %r{/favorites/.+}
|
|
handle_favs_log_entry(user, page_parser)
|
|
when %r{/user/.+}
|
|
handle_user_log_entry(user, page_parser)
|
|
else
|
|
raise "unknown uri path: #{hle.uri_path}"
|
|
end
|
|
end
|
|
end
|
|
|
|
sig do
|
|
params(
|
|
user: Domain::User::FaUser,
|
|
page_parser: Domain::Fa::Parser::Page,
|
|
).returns(Stats)
|
|
end
|
|
def handle_favs_log_entry(user, page_parser)
|
|
raise "not a listings page" unless page_parser.probably_listings_page?
|
|
favs_dates_stats =
|
|
Domain::Fa::Job::FavsJob.update_favs_and_dates(user:, page_parser:)
|
|
Stats.new(favs_dates_stats:, num_favs_pages: 1)
|
|
end
|
|
|
|
sig do
|
|
params(
|
|
user: Domain::User::FaUser,
|
|
page_parser: Domain::Fa::Parser::Page,
|
|
).returns(Stats)
|
|
end
|
|
def handle_user_log_entry(user, page_parser)
|
|
raise "not a user page" unless page_parser.probably_user_page?
|
|
favs_dates_stats =
|
|
Domain::Fa::Job::UserPageJob.update_favs_and_dates(
|
|
user,
|
|
page_parser.user_page,
|
|
)
|
|
Stats.new(favs_dates_stats:, num_user_pages: 1)
|
|
end
|
|
end
|