Files
redux-scraper/app/lib/tasks/create_post_file_fingerprints_task.rb
2025-08-16 04:44:04 +00:00

187 lines
5.2 KiB
Ruby

# typed: strict
module Tasks
class CreatePostFileFingerprintsTask < InterruptableTask
extend T::Sig
PROGRESS_KEY = T.let("task-create-post-file-fingerprints", String)
class Mode < T::Enum
enums do
PostFileDescending = new("post_file_descending")
PostsDescending = new("posts_descending")
User = new("user")
UsersDescending = new("users_descending")
end
end
sig { override.returns(String) }
def progress_key
PROGRESS_KEY
end
sig do
params(
mode: Mode,
user_param: T.nilable(String),
start_at: T.nilable(String),
).void
end
def run(mode:, user_param: nil, start_at: nil)
case mode
when Mode::PostFileDescending
run_post_file_descending(start_at)
when Mode::PostsDescending
run_posts_descending
when Mode::User
raise "need 'user_param' when mode is Mode::User" unless user_param
run_single_user(user_param)
when Mode::UsersDescending
run_users_descending
else
T.absurd(mode)
end
end
private
sig { params(start_at: T.nilable(String)).void }
def run_post_file_descending(start_at)
last_post_file_id = get_progress(start_at)&.to_i
query = Domain::PostFile.where(state: "ok").includes(:blob, :thumbnails)
query = query.where(id: ..last_post_file_id) if last_post_file_id
log("counting post files to process...")
# total = 49_783_962 # cache this value
total = query.count
pb = create_progress_bar(total)
query.find_each(
order: :desc,
batch_size: 32,
start: last_post_file_id,
) do |post_file|
break if interrupted?
migrate_post_file(post_file)
pb.progress = [pb.progress + 1, pb.total].min
if pb.progress % 100 == 0
post = post_file.post
creator_str =
if post&.class&.has_creators?
T.unsafe(post).creator&.to_param || "(none)"
else
"(none)"
end
post_desc =
"#{creator_str&.rjust(20)} / #{post_file.post&.to_param}".ljust(40)
log(
"post_file = #{post_file.id} :: #{post_desc} - #{post_file.post&.title_for_view}",
)
last_post_file_id = T.must(post_file.id)
save_progress(last_post_file_id.to_s)
end
break if interrupted?
end
save_progress(last_post_file_id.to_s) if last_post_file_id
end
sig { void }
def run_posts_descending
total = 66_431_808 # cache this value
pb = create_progress_bar(total)
Domain::Post.find_each(order: :desc) do |post|
break if interrupted?
migrate_post(post)
pb.progress = [pb.progress + 1, pb.total].min
end
end
sig { params(user_param: String).void }
def run_single_user(user_param)
user = DomainController.find_model_from_param(Domain::User, user_param)
raise "user '#{user_param}' not found" unless user
migrate_posts_for_user(user)
end
sig { void }
def run_users_descending
migrated_file = File.open("migrated_files.txt", "a+")
migrated_file.seek(0)
migrated_users = migrated_file.readlines.map(&:strip)
users =
Domain::User::FaUser.order(
Arel.sql("user_user_follows_to_count DESC NULLS LAST"),
).pluck(:id)
users.each do |user_id|
user = Domain::User::FaUser.find(user_id)
next if migrated_users.include?(user.to_param)
log(
"migrating posts for #{user.to_param} (#{user.num_watched_by} watched by)",
)
migrate_posts_for_user(user)
migrated_file.write("#{user.to_param}\n")
migrated_file.flush
end
migrated_file.close
end
sig { params(user: Domain::User).void }
def migrate_posts_for_user(user)
log("migrating posts for #{user.to_param}")
posts = user.posts.includes(files: %i[blob thumbnails bit_fingerprints])
pb = create_progress_bar(posts.count)
posts.find_in_batches(batch_size: 64) do |batch|
ReduxApplicationRecord.transaction do
batch.each do |post|
break if interrupted?
migrate_post(post)
pb.progress = [pb.progress + 1, pb.total].min
end
end
end
end
sig { params(post: Domain::Post).void }
def migrate_post(post)
creator_info =
if post.class.has_creators? && creator = T.unsafe(post).creator
"#{creator.url_name} (#{creator.user_user_follows_to_count})"
else
"(no creator)"
end
log("#{creator_info} :: #{post.to_param} / '#{post.title_for_view}'")
ColorLogger.quiet do
post.files.each do |file|
break if interrupted?
migrate_post_file(file)
rescue StandardError => e
log("error: #{e.message}")
end
end
end
sig { params(post_file: Domain::PostFile).void }
def migrate_post_file(post_file)
ColorLogger.quiet do
Domain::PostFile::Thumbnail.create_for_post_file!(post_file)
Domain::PostFile::BitFingerprint.create_for_post_file!(post_file)
rescue => e
log("error: #{e.message}")
end
end
end
end