visual fingerprinting
This commit is contained in:
142
Rakefile
142
Rakefile
@@ -482,28 +482,6 @@ task sample_migrated_favs: :environment do
|
||||
puts "new fav count: #{new_user.faved_posts.count}"
|
||||
end
|
||||
|
||||
task clear_e621_user_favs_migrated_at: :environment do
|
||||
puts "clearing migrated_user_favs_at"
|
||||
ReduxApplicationRecord.connection.execute(<<~SQL)
|
||||
UPDATE domain_users
|
||||
SET json_attributes = json_attributes - 'migrated_user_favs_at'
|
||||
WHERE type = 'Domain::User::E621User'
|
||||
AND json_attributes->>'migrated_user_favs_at' IS NOT NULL
|
||||
SQL
|
||||
puts "done"
|
||||
# query = Domain::User::E621User.where.not(migrated_user_favs_at: nil)
|
||||
# pb = ProgressBar.create(total: query.count, format: "%t: %c/%C %B %p%% %a %e")
|
||||
# query.find_in_batches(batch_size: 1000) do |b|
|
||||
# ReduxApplicationRecord.transaction do
|
||||
# b.each do |u|
|
||||
# u.migrated_user_favs_at = nil
|
||||
# u.save!
|
||||
# end
|
||||
# end
|
||||
# pb.progress += b.size
|
||||
# end
|
||||
end
|
||||
|
||||
task create_post_file_fingerprints: :environment do
|
||||
def migrate_posts_for_user(user)
|
||||
puts "migrating posts for #{user.to_param}"
|
||||
@@ -512,30 +490,130 @@ task create_post_file_fingerprints: :environment do
|
||||
total: user.posts.count,
|
||||
format: "%t: %c/%C %B %p%% %a %e",
|
||||
)
|
||||
|
||||
user
|
||||
.posts
|
||||
.includes(files: :blob)
|
||||
.find_in_batches(batch_size: 16) do |batch|
|
||||
.includes(:files)
|
||||
.find_in_batches(batch_size: 64) do |batch|
|
||||
ReduxApplicationRecord.transaction do
|
||||
batch.each do |post|
|
||||
post.files.each { |file| file.ensure_fingerprint! }
|
||||
puts "migrated #{post.id} / #{post.to_param} / '#{post.title_for_view}'"
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
batch.each { |post| migrate_post(post) }
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if ENV["user"].present?
|
||||
def migrate_post(post)
|
||||
puts "migrating #{post.id} / #{post.to_param} / '#{post.title_for_view}'"
|
||||
ColorLogger.quiet do
|
||||
post.files.each do |file|
|
||||
migrate_post_file(file)
|
||||
rescue StandardError => e
|
||||
puts "error: #{e.message}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def migrate_post_file(post_file)
|
||||
job = Domain::PostFileThumbnailJob.new
|
||||
ColorLogger.quiet do
|
||||
job.perform({ post_file: })
|
||||
rescue => e
|
||||
puts "error: #{e.message}"
|
||||
end
|
||||
end
|
||||
|
||||
if ENV["post_file_descending"].present?
|
||||
total = 49_783_962 # cache this value
|
||||
pb = ProgressBar.create(total:, format: "%t: %c/%C %B %p%% %a %e")
|
||||
i = 0
|
||||
Domain::PostFile
|
||||
.where(state: "ok")
|
||||
.includes(:blob)
|
||||
.find_each(
|
||||
order: :desc,
|
||||
batch_size: 32,
|
||||
start: ENV["start_at"],
|
||||
) do |post_file|
|
||||
i += 1
|
||||
if i % 100 == 0
|
||||
puts "migrating #{post_file.id} / #{post_file.post.title_for_view}"
|
||||
end
|
||||
migrate_post_file(post_file)
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
elsif ENV["posts_descending"].present?
|
||||
# total = Domain::Post.count
|
||||
total = 66_431_808 # cache this value
|
||||
pb = ProgressBar.create(total:, format: "%t: %c/%C %B %p%% %a %e")
|
||||
Domain::Post.find_each(order: :desc) do |post|
|
||||
migrate_post(post) unless post.is_a?(Domain::Post::InkbunnyPost)
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
elsif ENV["user"].present?
|
||||
for_user = ENV["user"] || raise("need 'user'")
|
||||
user = DomainController.find_model_from_param(Domain::User, for_user)
|
||||
raise "user '#{for_user}' not found" unless user
|
||||
migrate_posts_for_user(user)
|
||||
elsif ENV["users_descending"].present?
|
||||
# all users with posts, ordered by post count descending
|
||||
users = Domain::User::FaUser.order(num_watched_by: :desc).limit(20)
|
||||
users.find_each(batch_size: 1) { |user| migrate_posts_for_user(user) }
|
||||
migrated_file = File.open("migrated_files.txt", "a+")
|
||||
migrated_file.seek(0)
|
||||
migrated_users = migrated_file.readlines.map(&:strip)
|
||||
users =
|
||||
Domain::User::FaUser.order(
|
||||
Arel.sql("json_attributes->>'num_watched_by' DESC NULLS LAST"),
|
||||
).pluck(:id)
|
||||
|
||||
users.each do |user_id|
|
||||
user = Domain::User::FaUser.find(user_id)
|
||||
next if migrated_users.include?(user.to_param)
|
||||
puts "migrating posts for #{user.to_param} (#{user.num_watched_by} watched by)"
|
||||
migrate_posts_for_user(user)
|
||||
migrated_file.write("#{user.to_param}\n")
|
||||
migrated_file.flush
|
||||
end
|
||||
migrated_file.close
|
||||
else
|
||||
raise "need 'user' or 'users_descending'"
|
||||
end
|
||||
end
|
||||
|
||||
task enqueue_pending_post_files: :environment do
|
||||
query = Domain::PostFile.where(state: "pending")
|
||||
puts "enqueueing #{query.count} pending post files"
|
||||
query.find_in_batches(batch_size: 100, start: ENV["start_at"]) do |batch|
|
||||
while (
|
||||
queue_size =
|
||||
GoodJob::Job.where(
|
||||
job_class: "Job::PostFileJob",
|
||||
performed_at: nil,
|
||||
scheduled_at: nil,
|
||||
error: nil,
|
||||
).count
|
||||
) > 100
|
||||
puts "queue size: #{queue_size}"
|
||||
sleep 10
|
||||
end
|
||||
batch.each do |post_file|
|
||||
Job::PostFileJob.set(priority: 10).perform_later(post_file:)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
task find_post_files_with_empty_response: :environment do
|
||||
query =
|
||||
Domain::PostFile
|
||||
.where(state: "ok", retry_count: 0)
|
||||
.joins(:log_entry)
|
||||
.where(http_log_entries: { response_sha256: BlobFile::EMPTY_FILE_SHA256 })
|
||||
|
||||
pb = ProgressBar.create(total: query.count, format: "%t: %c/%C %B %p%% %a %e")
|
||||
|
||||
query.find_each(batch_size: 10) do |post_file|
|
||||
# puts "post_file: #{post_file.id} / '#{post_file.post.to_param}'"
|
||||
post_file.state_pending!
|
||||
post_file.save!
|
||||
Job::PostFileJob.perform_now(post_file:)
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user