add rake task for dumping fa user info
This commit is contained in:
1
Gemfile
1
Gemfile
@@ -111,6 +111,7 @@ gem "good_job"
|
||||
gem "neighbor"
|
||||
gem "disco"
|
||||
gem "faiss"
|
||||
gem "progressbar"
|
||||
|
||||
group :production, :staging do
|
||||
gem "rails_semantic_logger"
|
||||
|
||||
@@ -201,6 +201,7 @@ GEM
|
||||
pluck_each (0.2.0)
|
||||
activerecord (> 3.2.0)
|
||||
activesupport (> 3.0.0)
|
||||
progressbar (1.13.0)
|
||||
pry (0.14.2)
|
||||
coderay (~> 1.1)
|
||||
method_source (~> 1.0)
|
||||
@@ -371,6 +372,7 @@ DEPENDENCIES
|
||||
nokogiri
|
||||
pg
|
||||
pluck_each
|
||||
progressbar
|
||||
pry
|
||||
pry-stack_explorer
|
||||
puma (~> 5.0)
|
||||
|
||||
32
rake/fa.rake
32
rake/fa.rake
@@ -1,4 +1,36 @@
|
||||
namespace :fa do
|
||||
desc "dump FurAffinity users into json file, sorted by name"
|
||||
task :dump_users => [:set_logger_stdout, :environment] do
|
||||
out_file = ENV["out"] || raise("`out` file not specified (json)")
|
||||
print "counting users... "
|
||||
total = Domain::Fa::User.count
|
||||
puts "#{total}"
|
||||
progress = ProgressBar.create(total: total, throttle_rate: 0.2)
|
||||
columns = {
|
||||
name: "name of the FurAffinity user",
|
||||
url_name: "identifier used in URLs to refer to the user, e.g. https://www.furaffinity.net/user/test",
|
||||
num_submissions: "number of submissions the user has made, as per the user page",
|
||||
num_comments_recieved: "number of comments the user's posts have received, as per the user page",
|
||||
num_comments_given: "number of comments the user has made on other posts, as per the user page",
|
||||
num_journals: "number of journals the user has made, as per the user page",
|
||||
num_favorites: "number of posts the user has favorited, as per the user page",
|
||||
num_pageviews: "number of pageviews of the user's page, as per the user page",
|
||||
registered_at: "when the account was registered, as per the user page",
|
||||
}
|
||||
model_ids = Domain::Fa::User.order(url_name: :asc).pluck(:id)
|
||||
File.open(out_file, "wt") do |file|
|
||||
file.write(JSON.dump(columns))
|
||||
model_ids.in_groups_of(100, false).each do |ids_batch|
|
||||
rows = Domain::Fa::User.where(id: ids_batch).order(:url_name).pluck(*columns.keys)
|
||||
rows.each do |row|
|
||||
file << JSON.dump(columns.keys.zip(row).to_h)
|
||||
file << "\n"
|
||||
end
|
||||
progress.progress += ids_batch.size
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
desc "enqueue waiting posts"
|
||||
task :enqueue_waiting_posts => [:set_logger_stdout, :environment] do |t, args|
|
||||
start_at = (ENV["start_at"] || 0).to_i
|
||||
|
||||
Reference in New Issue
Block a user