Files
redux-scraper/app/jobs/domain/fa/job/scan_file_job.rb
2025-01-01 03:29:53 +00:00

107 lines
2.9 KiB
Ruby

# typed: false
class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
queue_as :static_file
def perform(args)
@post = args[:post]
@caused_by_entry = args[:caused_by_entry]
@force_scan = !!args[:force_scan]
if @post.nil?
logger.error "no post model - fa_id: #{args[:fa_id]}, enqueue scan"
if args[:fa_id]
defer_job(
Domain::Fa::Job::ScanPostJob,
{ fa_id: args[:fa_id], caused_by_entry: @caused_by_entry },
)
end
return
end
logger.prefix = "[fa_id #{@post.fa_id.to_s.bold} / #{@post.state.bold}]"
if @post.state == "removed" && @post.file_uri.nil?
logger.error "removed and has no file, skipping"
return
end
if !@post.scanned?
logger.error "has not been scanned yet, doing so first"
enqueue_post_scan(@post, @caused_by_entry)
return
end
if @post.have_file?
logger.warn("already have file")
return
end
file_uri_host = @post.file_uri&.host
if file_uri_host
is_unresolvable_host = false
is_unresolvable_host ||= file_uri_host == "d9.facdn.net"
uri_tld = file_uri_host.split(".").last
is_unresolvable_host ||=
uri_tld.length >= 6 && file_uri_host.start_with?("d.facdn.net")
if is_unresolvable_host
logger.error("host is #{file_uri_host}, which will not resolve")
@post.state = :file_error
@post.state_detail = { file_error: "#{file_uri_host} is unresolvable" }
@post.save!
return
end
end
if @post.state == "file_error" && !@force_scan
logger.warn("state == file_error, skipping")
return
end
download_file
@post.save!
end
private
def download_file
response =
http_client.get(@post.file_uri.to_s, caused_by_entry: @caused_by_entry)
if response.status_code == 404
@post.state_detail["404_count"] ||= 0
@post.state_detail["404_count"] += 1
fof_count = (@post.state_detail["404_count"] || 0)
if fof_count > 2
@post.state = :file_error
@post.state_detail["file_error"] = "too many 404s"
@post.save!
logger.error "too many 404s (#{fof_count.to_s.bold}), aborting"
return
end
end
if response.status_code != 200
defer_job(
Domain::Fa::Job::ScanPostJob,
{ post: @post, caused_by_entry: response.log_entry, force_scan: true },
)
err_msg =
"error downloading - log entry #{response.log_entry.id} / status code #{response.status_code}"
@post.save!
if response.status_code == 404 && @post.state == "removed"
logger.error(err_msg)
return
else
fatal_error(err_msg)
end
end
logger.debug "#{HexUtil.humansize(response.log_entry.response.size)} / #{response.log_entry.content_type} / #{response.log_entry.response_time_ms} ms"
@post.file = response.log_entry
end
end