Files
redux-scraper/app/jobs/domain/static_file_job_helper.rb
2025-08-18 23:58:06 +00:00

116 lines
3.2 KiB
Ruby

# typed: strict
module Domain::StaticFileJobHelper
extend T::Sig
extend T::Helpers
include HasColorLogger
abstract!
requires_ancestor { Scraper::JobBase }
MAX_RETRIES = 3
sig { params(post_file: Domain::PostFile).void }
def download_post_file(post_file)
if post_file.state_terminal_error?
logger.error(format_tags("terminal error state, skipping"))
return
end
if post_file.state_ok? && post_file.log_entry_id.present?
logger.warn(format_tags("already downloaded, skipping"))
return
end
if post_file.state_retryable_error? && post_file.retry_count >= MAX_RETRIES
logger.warn(format_tags("retry count >= max retries, skipping"))
post_file.state_terminal_error!
return
end
file_url_str = post_file.url_str
if file_url_str.blank?
logger.warn(format_tags("no url, skipping"))
post_file.state_terminal_error!
return
end
begin
response = http_client.get(file_url_str)
rescue Scraper::HttpClient::InvalidURLError,
Curl::Err::HostResolutionError => e
post_file.state_terminal_error!
post_file.error_message = e.message
logger.error(
format_tags(
"invalid url, terminal error state",
make_tag("error_message", e.message),
make_tag("url", file_url_str),
),
)
return
end
handle_file_download_response(post_file, response)
ensure
post_file.save! if post_file
end
sig do
params(
post_file: Domain::PostFile,
response: Scraper::HttpClient::Response,
).void
end
def handle_file_download_response(post_file, response)
should_enqueue_thumbnail_job = T.let(false, T::Boolean)
status_code = response.status_code
if response.log_entry.response_sha256 == BlobFile::EMPTY_FILE_SHA256 &&
post_file.retry_count == 0
logger.warn(
format_tags("empty file, assuming server error and will try again"),
)
status_code = 500
end
post_file.log_entry = response.log_entry
post_file.blob_sha256 = response.log_entry.response_sha256
post_file.last_status_code = status_code
logger.tagged(make_arg_tag(response.log_entry)) do
if status_code == 200
should_enqueue_thumbnail_job = true
post_file.state_ok!
logger.info(format_tags("downloaded file"))
elsif status_code == 404
post_file.state_terminal_error!
logger.error(format_tags("404, terminal error state"))
else
post_file.retry_count += 1
if post_file.retry_count > MAX_RETRIES
post_file.state_terminal_error!
logger.error(
format_tags(
make_tag("retry_count", post_file.retry_count),
"retried too many times, giving up",
),
)
else
post_file.state_retryable_error!
fatal_error(
format_tags(
make_tag("retry_count", post_file.retry_count),
"will retry later",
),
)
end
end
end
ensure
post_file.save!
post = post_file.post
post.touch if post
if should_enqueue_thumbnail_job
defer_job(Domain::PostFileThumbnailJob, { post_file: })
end
end
end