add rake task for dumping fa user info

This commit is contained in:
Dylan Knutson
2023-04-07 08:50:16 -07:00
parent e57b0f4fc9
commit ab13af43af
3 changed files with 35 additions and 0 deletions

View File

@@ -111,6 +111,7 @@ gem "good_job"
gem "neighbor"
gem "disco"
gem "faiss"
gem "progressbar"
group :production, :staging do
gem "rails_semantic_logger"

View File

@@ -201,6 +201,7 @@ GEM
pluck_each (0.2.0)
activerecord (> 3.2.0)
activesupport (> 3.0.0)
progressbar (1.13.0)
pry (0.14.2)
coderay (~> 1.1)
method_source (~> 1.0)
@@ -371,6 +372,7 @@ DEPENDENCIES
nokogiri
pg
pluck_each
progressbar
pry
pry-stack_explorer
puma (~> 5.0)

View File

@@ -1,4 +1,36 @@
namespace :fa do
desc "dump FurAffinity users into json file, sorted by name"
task :dump_users => [:set_logger_stdout, :environment] do
out_file = ENV["out"] || raise("`out` file not specified (json)")
print "counting users... "
total = Domain::Fa::User.count
puts "#{total}"
progress = ProgressBar.create(total: total, throttle_rate: 0.2)
columns = {
name: "name of the FurAffinity user",
url_name: "identifier used in URLs to refer to the user, e.g. https://www.furaffinity.net/user/test",
num_submissions: "number of submissions the user has made, as per the user page",
num_comments_recieved: "number of comments the user's posts have received, as per the user page",
num_comments_given: "number of comments the user has made on other posts, as per the user page",
num_journals: "number of journals the user has made, as per the user page",
num_favorites: "number of posts the user has favorited, as per the user page",
num_pageviews: "number of pageviews of the user's page, as per the user page",
registered_at: "when the account was registered, as per the user page",
}
model_ids = Domain::Fa::User.order(url_name: :asc).pluck(:id)
File.open(out_file, "wt") do |file|
file.write(JSON.dump(columns))
model_ids.in_groups_of(100, false).each do |ids_batch|
rows = Domain::Fa::User.where(id: ids_batch).order(:url_name).pluck(*columns.keys)
rows.each do |row|
file << JSON.dump(columns.keys.zip(row).to_h)
file << "\n"
end
progress.progress += ids_batch.size
end
end
end
desc "enqueue waiting posts"
task :enqueue_waiting_posts => [:set_logger_stdout, :environment] do |t, args|
start_at = (ENV["start_at"] || 0).to_i