Files
redux-scraper/rake/posts.rake
2025-08-05 18:52:50 +00:00

45 lines
1.5 KiB
Ruby

# typed: true
# frozen_string_literal: true
T.bind(self, T.all(Rake::DSL, Object))
namespace :posts do
desc "Find 404 post files with mismatched normalized URLs"
task find_404_post_files_with_mismatched_normalized_urls: :environment do
query =
Domain::PostFile
.where(state: "terminal_error")
.where("url_str ~ '[^\\x00-\\x7F]'")
.includes(:log_entry, post: :files)
query.find_each(batch_size: 32, order: :desc) do |post_file|
le = post_file.log_entry
next if le.nil?
next if le.status_code != 404
next if post_file.url_str.blank?
uri = Addressable::URI.parse(post_file.url_str)
next if uri.to_s == uri.normalize.to_s
next if post_file.post&.files&.any? { |file| file.state_ok? }
if post_file.post&.files&.any? { |file|
file.url_str =~ /furarchiver.net/
}
next
end
puts "#{post_file.post.to_param} ::::: #{post_file.to_param} ::::: #{post_file.url_str}"
end
end
desc "Redownload 404 post files with mismatched normalized URLs"
task redownload_404_post_files_with_mismatched_normalized_urls:
:environment do
File
.readlines(ENV["file"] || raise("must provide file"))
.map(&:strip)
.each do |line|
_, file_id, _ = line.split(" ::::: ")
raise("no file id") if file_id.blank?
file = Domain::PostFile.find(file_id)
file.state_pending!
Domain::Fa::Job::ScanFileJob.perform_now(file:)
end
end
end