Files
redux-scraper/app/jobs/domain/static_file_job_helper.rb
2025-02-25 05:47:44 +00:00

87 lines
2.4 KiB
Ruby

# typed: strict
module Domain::StaticFileJobHelper
extend T::Sig
extend T::Helpers
include HasColorLogger
abstract!
requires_ancestor { Scraper::JobBase }
MAX_RETRIES = 3
sig { params(post_file: Domain::PostFile).void }
def download_post_file(post_file)
if post_file.state_terminal_error?
logger.error(format_tags("terminal error state, skipping"))
return
end
if post_file.state_ok? && post_file.log_entry_id.present?
logger.warn(format_tags("already downloaded, skipping"))
return
end
if post_file.state_retryable_error? && post_file.retry_count >= MAX_RETRIES
logger.warn(format_tags("retry count >= max retries, skipping"))
post_file.state_terminal_error!
return
end
file_url_str = post_file.url_str
if file_url_str.blank?
logger.warn(format_tags("no url, skipping"))
post_file.state_terminal_error!
return
end
begin
response = http_client.get(file_url_str)
rescue Scraper::HttpClient::InvalidURLError,
Curl::Err::HostResolutionError => e
post_file.state_terminal_error!
post_file.error_message = e.message
logger.error(
format_tags(
"invalid url, terminal error state",
make_tag("error_message", e.message),
make_tag("url", file_url_str),
),
)
return
end
post_file.log_entry = response.log_entry
post_file.last_status_code = response.status_code
logger.tagged(make_arg_tag(response.log_entry)) do
if response.status_code == 200
post_file.state_ok!
post_file.retry_count = 0
logger.info(format_tags("downloaded file"))
elsif response.status_code == 404
post_file.state_terminal_error!
logger.error(format_tags("404, terminal error state"))
else
post_file.retry_count += 1
if post_file.retry_count > MAX_RETRIES
post_file.state_terminal_error!
logger.error(
format_tags(
make_tag("retry_count", post_file.retry_count),
"retried too many times, giving up",
),
)
else
post_file.state_retryable_error!
fatal_error(
format_tags(
make_tag("retry_count", post_file.retry_count),
"will retry later",
),
)
end
end
end
ensure
post_file.save! if post_file
end
end