Files
redux-scraper/Rakefile
Dylan Knutson e781ed8f43 Enhance Inkbunny job processing and update post handling
- Updated Rakefile to enqueue periodic jobs for Inkbunny latest posts, improving background processing.
- Added a check in UpdatePostsJob to handle cases with empty post IDs, preventing unnecessary processing.
- Enhanced IndexedPost model to support posting dates for Inkbunny posts.
- Refined view for displaying indexed posts, improving the presentation of posting dates and user experience.
2024-12-30 21:57:32 +00:00

143 lines
3.5 KiB
Ruby

# Add your own tasks in files placed in lib/tasks ending in .rake,
# for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
require "rake/testtask"
require_relative "config/application"
Rails.application.load_tasks
Dir.glob(Rails.root.join("rake", "*.rake")).each { |rake_file| load rake_file }
task set_ar_stdout: :environment do
ActiveRecord::Base.logger = Logger.new($stdout)
end
task set_logger_stdout: :environment do
Rails.logger = Logger.new($stdout)
Rails.logger.formatter =
proc { |severity, datetime, progname, msg| "#{severity}: #{msg}\n" }
ActiveRecord::Base.logger = nil
ActiveJob::Base.logger = nil
GoodJob.logger = Rails.logger
end
task periodic_tasks: %i[environment set_logger_stdout] do
Thread.new do
loop do
Rake::Task["pghero:capture_space_stats"].execute
puts "logged space stats"
sleep 6.hours
end
end
Thread.new do
loop do
Rake::Task["pghero:capture_query_stats"].execute
puts "logged query stats"
sleep 5.minutes
end
end
Thread.new do
loop do
Rake::Task["fa:browse_page_job"].execute
Rake::Task["fa:home_page_job"].execute
Rake::Task["e621:posts_index_job"].execute
puts "enqueue periodic jobs"
sleep 1.minute
end
end
Thread.new do
loop do
puts "enqueue inkbunny latest posts"
Domain::Inkbunny::Job::LatestPostsJob.set(
queue: "inkbunny",
priority: -20,
).perform_later({})
sleep 2.minutes
end
end
loop { sleep 10 }
end
namespace :db_sampler do
task export: :environment do
url_names = ENV["url_names"] || raise("need 'url_names' (comma-separated)")
outfile = $stdout
DbSampler.new(outfile).export(url_names.split(","))
ensure
outfile.close if outfile
end
task import: [:environment] do
infile = $stdin
DbSampler.new(infile).import
ensure
infile.close if infile
end
end
task good_job: %i[environment set_ar_stdout set_logger_stdout] do
env_hash = {
"RAILS_ENV" => "worker",
"GOOD_JOB_POLL_INTERVAL" => "5",
"GOOD_JOB_MAX_CACHE" => "10000",
"GOOD_JOB_QUEUE_SELECT_LIMIT" => "4096",
"GOOD_JOB_MAX_THREADS" => "4",
"GOOD_JOB_QUEUES" =>
ENV["GOOD_JOB_QUEUES"] ||
%w[manual:4 fa_post,e621:2 *:6].reject(&:nil?).join(";"),
}
env_hash.each do |key, value|
ENV[key] = value
puts "$> #{key.light_black.bold} = #{value.bold}"
end
cmd = "bundle exec good_job"
puts "$> #{cmd.bold}"
exec(cmd)
end
task recompute_job_signatures: :environment do
ActiveRecord::Base.logger = Logger.new(STDOUT)
ActiveRecord::Base.logger.level = :error
start_at = ENV["start_at"]&.to_i || 0
count = 0
destroyed = 0
puts "# jobs: #{Delayed::Job.count}"
Delayed::Job.find_each(start: start_at) do |job|
job.set_signature
unless job.save
job.destroy
destroyed += 1
end
count += 1
if count % 50 == 0
puts "processed #{count}, destroyed #{destroyed} - last id: #{job.id}"
end
end
end
task workoff_failed_jobs: %i[environment set_ar_stdout set_logger_stdout] do
worker = Delayed::Worker.new
Delayed::Job
.where("last_error is not null and attempts <= 2")
.find_each(batch_size: 1) { |job| worker.run(job) }
end
task :reverse_csv do
file = ENV["file"] || raise("need 'file' (file path)")
in_csv = CSV.parse(File.open(file, "r+"), headers: true)
out_csv =
CSV.new(
File.open("rev_" + file, "w"),
write_headers: true,
headers: in_csv.headers,
)
in_csv.reverse_each { |row| out_csv << row.map(&:second) }
out_csv.close
end