rake job fixes, optimize fa favs backfill

This commit is contained in:
Dylan Knutson
2025-09-10 16:30:51 +00:00
parent 4f7217abf0
commit 3174032ac3
10 changed files with 205 additions and 163 deletions

View File

@@ -239,26 +239,36 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
user.update_fav_model(post_id:, fav_id:, explicit_time:)
end
(page_parser.submissions_parsed[1..] || [])
.filter_map do |sub_data|
post_id = (id = sub_data.id) && fa_id_to_post_id[id]
next if post_id.nil?
fav_id = sub_data.fav_id
next if fav_id.nil?
user_post_favs_with_fav_id =
(page_parser.submissions_parsed[1..] || [])
.filter_map do |sub_data|
post_id = (id = sub_data.id) && fa_id_to_post_id[id]
next if post_id.nil?
fav_id = sub_data.fav_id
next if fav_id.nil?
FavUpsertData
.new(post_id:, fav_id:)
.tap do
num_updated_with_fav_fa_id += 1
num_updated_total += 1
end
end
.group_by(&:post_id)
.values
.filter_map { |data_arr| data_arr.max_by(&:fav_id) }
.each do |data|
user.update_fav_model(post_id: data.post_id, fav_id: data.fav_id)
end
FavUpsertData
.new(post_id:, fav_id:)
.tap do
num_updated_with_fav_fa_id += 1
num_updated_total += 1
end
end
.group_by(&:post_id)
.values
.filter_map { |data_arr| data_arr.max_by(&:fav_id) }
.map do |data|
{
user_id: T.must(user.id),
post_id: data.post_id,
fa_fav_id: data.fav_id,
}
end
Domain::UserPostFav::FaUserPostFav.upsert_all(
user_post_favs_with_fav_id,
unique_by: %i[user_id post_id],
)
FavsAndDatesStats.new(
num_updated_with_fav_fa_id:,

View File

@@ -32,8 +32,8 @@ class EnqueueJobBase < Tasks::InterruptableTask
10.seconds
end
sig { void }
def run
sig { override.void }
def run_impl
@inferred_queue_size = queue_size
logger.info(
"initial queue size is #{@inferred_queue_size}, starting enqueuing",

View File

@@ -24,21 +24,30 @@ module Tasks
mode: Mode,
user_param: T.nilable(String),
start_at: T.nilable(String),
log_sink: T.any(IO, StringIO),
).void
end
def run(mode:, user_param: nil, start_at: nil)
case mode
def initialize(mode:, user_param: nil, start_at: nil, log_sink: $stderr)
super(log_sink:)
@mode = T.let(mode, Mode)
@user_param = T.let(user_param, T.nilable(String))
@start_at = T.let(start_at, T.nilable(String))
end
sig { override.void }
def run_impl
case @mode
when Mode::PostFileDescending
run_post_file_descending(start_at)
run_post_file_descending(@start_at)
when Mode::PostsDescending
run_posts_descending
when Mode::User
raise "need 'user_param' when mode is Mode::User" unless user_param
run_single_user(user_param)
raise "need 'user_param' when mode is Mode::User" unless @user_param
run_single_user(@user_param)
when Mode::UsersDescending
run_users_descending
else
T.absurd(mode)
T.absurd(@mode)
end
end
@@ -68,7 +77,7 @@ module Tasks
last_post_file = T.must(post_files.last)
post_files
.each_slice(post_files.size / num_threads)
.each_slice([post_files.size / num_threads, 1].max)
.map
.with_index do |batch, index|
Thread.new do

View File

@@ -88,8 +88,8 @@ class Tasks::Fa::BackfillFavsAndDatesTask < Tasks::InterruptableTask
end
end
sig { void }
def run
sig { override.void }
def run_impl
total_stats = Stats.zero
query_string =
@@ -118,6 +118,8 @@ class Tasks::Fa::BackfillFavsAndDatesTask < Tasks::InterruptableTask
pb = create_progress_bar(nil)
end
start_profiling!
query
.includes(:response)
.in_batches(start: @start_at, of: @batch_size) do |batch|

View File

@@ -7,6 +7,11 @@ class Tasks::Fa::MigrateFaUserPostFavs < Tasks::InterruptableTask
"fa-migrate-fa-user-post-favs"
end
sig { override.void }
def run_impl
raise "not implemented"
end
sig { params(user: Domain::User::FaUser, batch_size: Integer).void }
def run_for_user(user:, batch_size: 100)
user_faved_post_ids = user.faved_posts.pluck(:id)

View File

@@ -65,5 +65,44 @@ module Tasks
format: PB_FORMAT,
)
end
sig { void }
def run
begin
run_impl
ensure
end_profiling!
end
end
sig { abstract.void }
def run_impl
end
sig { void }
def start_profiling!
return unless ENV["PROFILE"]
@log_sink.puts "starting profiling"
RubyProf.start
end
sig { void }
def end_profiling!
return unless ENV["PROFILE"]
return unless RubyProf.running?
base = "profiler/#{progress_key}"
FileUtils.mkdir_p(base) unless File.exist?(base)
result = RubyProf.stop
File.open("#{base}/profile.txt", "w") do |f|
RubyProf::GraphPrinter.new(result).print(f, { min_percent: 1 })
end
File.open("#{base}/profile.html", "w") do |f|
RubyProf::CallStackPrinter.new(result).print(f, { min_percent: 1 })
end
File.open("#{base}/profile.rubyprof", "w") do |f|
RubyProf::SpeedscopePrinter.new(result).print(f, { min_percent: 1 })
end
@log_sink.puts "profiling results saved to #{base}"
end
end
end

View File

@@ -126,14 +126,19 @@ class Domain::User::FaUser < Domain::User
post_id: Integer,
fav_id: T.nilable(Integer),
explicit_time: T.nilable(Time),
).returns(Domain::UserPostFav)
).void
end
def update_fav_model(post_id:, fav_id: nil, explicit_time: nil)
model = self.user_post_favs.find_or_initialize_by(post_id:)
model.fa_fav_id = fav_id if fav_id.present?
model.explicit_time = explicit_time.in_time_zone if explicit_time.present?
model.save!
model
attrs = { user_id: self.id, post_id: }
attrs[:fa_fav_id] = fav_id if fav_id.present?
attrs[:explicit_time] = explicit_time.in_time_zone if explicit_time.present?
Domain::UserPostFav::FaUserPostFav.upsert(
attrs,
unique_by: %i[user_id post_id],
)
self.user_post_favs.reset
end
# TODO - write a test for this