abstraction for http_client

This commit is contained in:
Dylan Knutson
2025-01-28 23:50:12 +00:00
parent de4874c886
commit 6c33c35a12
25 changed files with 139 additions and 126 deletions

View File

@@ -4,13 +4,8 @@ class Domain::E621::Job::PostsIndexJob < Domain::E621::Job::Base
sig { override.params(args: T::Hash[Symbol, T.untyped]).void }
def perform(args)
response =
http_client.get(
"https://e621.net/posts.json",
caused_by_entry: causing_log_entry,
)
response = http_client.get("https://e621.net/posts.json")
log_entry = response.log_entry
self.first_log_entry ||= log_entry
if response.status_code != 200
fatal_error(

View File

@@ -21,8 +21,7 @@ class Domain::E621::Job::ScanPostFavsJob < Domain::E621::Job::Base
logger.info("requesting page #{page}")
url =
"https://e621.net/posts/#{post.e621_id}/favorites?limit=#{MAX_USERS_PER_PAGE}&page=#{page}"
response = http_client.get(url, caused_by_entry: causing_log_entry)
self.first_log_entry ||= response.log_entry
response = http_client.get(url)
if response.status_code != 200
fatal_error("#{response.status_code} - bailing")
end

View File

@@ -20,13 +20,8 @@ class Domain::E621::Job::ScanPostJob < Domain::E621::Job::Base
end
logger.info("Scanning post #{post.e621_id}")
response =
http_client.get(
"https://e621.net/posts/#{post.e621_id}.json",
caused_by_entry: causing_log_entry,
)
response = http_client.get("https://e621.net/posts/#{post.e621_id}.json")
log_entry = response.log_entry
self.first_log_entry ||= log_entry
if response.status_code != 200
post.state_detail["scan_log_entry_id"] = log_entry.id
post.state = :scan_error

View File

@@ -37,9 +37,7 @@ class Domain::E621::Job::ScanUserFavsJob < Domain::E621::Job::Base
else
limiter = "(none)"
end
response = http_client.get(url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
self.first_log_entry ||= log_entry
response = http_client.get(url)
if response.status_code == 403 &&
response.body.include?("This users favorites are hidden")

View File

@@ -12,9 +12,7 @@ class Domain::E621::Job::ScanUsersJob < Domain::E621::Job::Base
url = "https://e621.net/users.json?limit=320"
url += "&page=b#{after}" if after
response = http_client.get(url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
self.first_log_entry ||= log_entry
response = http_client.get(url)
if response.status_code != 200
fatal_error(

View File

@@ -24,8 +24,7 @@ class Domain::E621::Job::StaticFileJob < Domain::E621::Job::Base
end
end
response = http_client.get(file_url_str, caused_by_entry: causing_log_entry)
self.first_log_entry ||= response.log_entry
response = http_client.get(file_url_str)
if response.status_code != 200
post.state = :file_error

View File

@@ -38,9 +38,7 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
url = "https://www.furaffinity.net/browse/#{@page_number}/"
end
response = http_client.get(url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
self.first_log_entry ||= log_entry
response = http_client.get(url)
if response.status_code != 200
fatal_error(

View File

@@ -119,13 +119,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
else
"https://www.furaffinity.net/favorites/#{user.url_name}/"
end
response =
http_client.get(
url,
caused_by_entry: causing_log_entry,
use_http_cache: @use_http_cache,
)
self.first_log_entry ||= response.log_entry
response = http_client.get(url, use_http_cache: @use_http_cache)
if response.status_code != 200
fatal_error(
"http #{response.status_code.to_s.red.bold}, " +

View File

@@ -24,9 +24,7 @@ class Domain::Fa::Job::HomePageJob < Domain::Fa::Job::Base
def scan_home_page
url = "https://www.furaffinity.net/"
response = http_client.get(url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
self.first_log_entry ||= log_entry
response = http_client.get(url)
if response.status_code != 200
fatal_error(

View File

@@ -63,9 +63,7 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
return
end
response =
http_client.get(post.file_uri.to_s, caused_by_entry: causing_log_entry)
self.first_log_entry ||= response.log_entry
response = http_client.get(post.file_uri.to_s)
if response.status_code == 404
post.state_detail["404_count"] ||= 0

View File

@@ -59,11 +59,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
sig { params(post: Domain::Fa::Post).void }
def scan_post(post)
response =
http_client.get(
"https://www.furaffinity.net/view/#{post.fa_id}/",
caused_by_entry: causing_log_entry,
)
self.first_log_entry ||= response.log_entry
http_client.get("https://www.furaffinity.net/view/#{post.fa_id}/")
if response.status_code != 200
fatal_error(
"error scanning fa_id #{post.fa_id}: #{response.status_code}, log entry #{response.log_entry.id}",

View File

@@ -12,11 +12,7 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
end
response =
http_client.get(
"https://a.furaffinity.net/0/#{user.url_name}.gif",
caused_by_entry: @caused_by_entry,
)
self.first_log_entry ||= response.log_entry
http_client.get("https://a.furaffinity.net/0/#{user.url_name}.gif")
avatar.state_detail["log_entries"] ||= [avatar.log_entry&.id].compact
avatar.state_detail["log_entries"] << response.log_entry.id

View File

@@ -76,8 +76,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
else
"https://www.furaffinity.net/watchlist/by/#{user.url_name}/"
end
response = http_client.get(url, caused_by_entry: causing_log_entry)
self.first_log_entry ||= response.log_entry
response = http_client.get(url)
if response.status_code != 200
fatal_error(
"http #{response.status_code.to_s.red.bold}, " +

View File

@@ -86,7 +86,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
folder_href = "/" + folder_href unless folder_href.start_with?("/")
page_url =
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
response = http_client.get(page_url, caused_by_entry: causing_log_entry)
response = http_client.get(page_url)
log_entry = response.log_entry
if response.status_code == 200

View File

@@ -28,11 +28,7 @@ module Domain::Fa::Job
end
response =
http_client.get(
"https://www.furaffinity.net/user/#{user.url_name}/",
caused_by_entry: @caused_by_entry,
)
@log_entry = response.log_entry
http_client.get("https://www.furaffinity.net/user/#{user.url_name}/")
ret, opts =
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(

View File

@@ -17,10 +17,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
end
response =
http_client.get(
"https://www.furaffinity.net/user/#{user.url_name}/",
caused_by_entry: causing_log_entry,
)
http_client.get("https://www.furaffinity.net/user/#{user.url_name}/")
ret, opts =
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(user, response)

View File

@@ -1,9 +1,22 @@
# typed: true
# typed: strict
class Domain::Inkbunny::Job::Base < Scraper::JobBase
extend T::Sig
discard_on ActiveJob::DeserializationError
queue_as :inkbunny
sig { override.returns(Symbol) }
def self.http_factory_method
:get_inkbunny_http_client
end
sig { returns(T.nilable(Domain::Inkbunny::User)) }
def user_from_args
T.cast(arguments[0][:user], T.nilable(Domain::Inkbunny::User))
end
sig { returns(Domain::Inkbunny::User) }
def user_from_args!
user_from_args || raise("user must exist")
end
end

View File

@@ -1,11 +1,11 @@
# typed: true
# typed: strict
module Domain::Inkbunny::Job
class FileJob < Base
queue_as :static_file
sig { override.params(args: T.untyped).void }
def perform(args)
file = args[:file] || fatal_error("file is required")
caused_by_entry = args[:caused_by_entry]
logger.prefix =
proc do
"[#{file.id.to_s.bold} / " + "#{file.ib_file_id.to_s.bold} / " +
@@ -21,7 +21,7 @@ module Domain::Inkbunny::Job
end
end
response = http_client.get(url_str, caused_by_entry: caused_by_entry)
response = http_client.get(url_str)
if response.status_code != 200
file.state = :error

View File

@@ -16,9 +16,7 @@ module Domain::Inkbunny::Job
end
url = ApiSearchPageProcessor.build_api_search_url(rid: rid, page: page)
response = http_client.post(url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
self.first_log_entry ||= log_entry
response = http_client.post(url)
if response.status_code != 200
fatal_error("api_search failed: #{response.status_code}")
@@ -27,7 +25,7 @@ module Domain::Inkbunny::Job
result =
processor.process!(
JSON.parse(response.body),
caused_by_entry: log_entry,
caused_by_entry: response.log_entry,
)
num_new_posts = T.cast(result[:num_new_posts], Integer)
logger.info(

View File

@@ -28,9 +28,7 @@ module Domain::Inkbunny::Job
rid: rid,
page: page,
)
response = http_client.post(url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
self.first_log_entry ||= log_entry
response = http_client.post(url)
pool.deep_update_log_entry = causing_log_entry
if response.status_code != 200
@@ -40,7 +38,7 @@ module Domain::Inkbunny::Job
result =
processor.process!(
JSON.parse(response.body),
caused_by_entry: log_entry,
caused_by_entry: response.log_entry,
)
rid ||= T.cast(result[:rid], String)

View File

@@ -58,9 +58,7 @@ class Domain::Inkbunny::Job::UpdatePostsJob < Domain::Inkbunny::Job::Base
pools_to_update
)
url = build_api_submissions_url(ib_post_ids_chunk)
response = http_client.get(url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
self.first_log_entry ||= log_entry
response = http_client.get(url)
if response.status_code != 200
fatal_error("api_submissions failed: #{response.status_code}")
end
@@ -72,7 +70,7 @@ class Domain::Inkbunny::Job::UpdatePostsJob < Domain::Inkbunny::Job::Base
Domain::Inkbunny::Post.transaction do
deep_update_post_from_submission_json(
submission_json,
log_entry,
response.log_entry,
missing_pool_post_ib_ids,
pools_to_update,
)

View File

@@ -1,49 +1,53 @@
# typed: true
# typed: strict
module Domain::Inkbunny::Job
class UserAvatarJob < Base
queue_as :static_file
sig { params(args: T.untyped).void }
def initialize(*args)
super(*T.unsafe(args))
end
sig { override.params(args: T.untyped).void }
def perform(args)
@user = args[:user] || raise("user must exist")
@caused_by_entry = args[:caused_by_entry]
user = user_from_args!
logger.prefix =
proc do
"[user #{@user.name.to_s.bold} / #{@user.ib_user_id.to_s.bold}]"
end
proc { "[user #{user.name.to_s.bold} / #{user.ib_user_id.to_s.bold}]" }
if @user.avatar_url_str.blank?
avatar_url_str = user.avatar_url_str
if avatar_url_str.blank?
logger.warn("user has no avatar_url_str")
return
end
response =
http_client.get(@user.avatar_url_str, caused_by_entry: @caused_by_entry)
response = http_client.get(avatar_url_str)
self.first_log_entry ||= response.log_entry
@user.avatar_state_detail ||= {}
@user.avatar_state_detail["log_entries"] ||= [
@user.avatar_file_log_entry_id,
user.avatar_state_detail ||= {}
user.avatar_state_detail["log_entries"] ||= [
user.avatar_file_log_entry_id,
].compact
@user.avatar_state_detail["log_entries"] << response.log_entry.id
@user.avatar_log_entry = response.log_entry
user.avatar_state_detail["log_entries"] << response.log_entry.id
user.avatar_log_entry = response.log_entry
case response.status_code
when 200
@user.avatar_state = :ok
@user.avatar_state_detail.delete("download_error")
@user.avatar_downloaded_at = response.log_entry.created_at
@user.avatar_file_sha256 = response.log_entry.response_sha256
user.avatar_state = :ok
user.avatar_state_detail.delete("download_error")
user.avatar_downloaded_at = response.log_entry.created_at
user.avatar_file_sha256 = response.log_entry.response_sha256
logger.info("downloaded avatar")
when 404
@user.avatar_state = :not_found
user.avatar_state = :not_found
logger.info("avatar 404")
else
@user.avatar_state = :error
@user.avatar_state_detail[
user.avatar_state = :error
user.avatar_state_detail[
"download_error"
] = "http status #{response.status_code}"
if @user.avatar_file_sha256.blank?
@user.avatar_downloaded_at = response.log_entry.created_at
if user.avatar_file_sha256.blank?
user.avatar_downloaded_at = response.log_entry.created_at
logger.info("avatar error, and no previous file")
else
logger.info("avatar error, keeping previous file")
@@ -53,7 +57,7 @@ module Domain::Inkbunny::Job
)
end
ensure
@user.save! if @user
user.save! if user
end
end
end

View File

@@ -1,10 +1,9 @@
# typed: false
# typed: true
module Domain::Inkbunny::Job
class UserGalleryJob < Base
def perform(args)
user = args[:user] || raise("user must exist")
caused_by_entry = args[:caused_by_entry]
logger.prefix = "[#{user.name.bold} / #{user.ib_user_id.to_s.bold}]"
user = user_from_args!
logger.prefix = "[#{user.name&.bold} / #{user.ib_user_id.to_s.bold}]"
if user.scanned_gallery_at&.after?(1.week.ago)
logger.warn(
@@ -14,13 +13,12 @@ module Domain::Inkbunny::Job
end
processor = ApiSearchPageProcessor.new
first_log_entry = nil
rid = nil
page = 1
loop_count = 0
max_loop_count = 2000
rid = T.let(nil, T.nilable(String))
page = T.let(1, Integer)
loop_count = T.let(0, Integer)
max_loop_count = T.let(2000, Integer)
while true
loop do
loop_count += 1
raise("loop_count: #{loop_count}") if loop_count > max_loop_count
@@ -31,12 +29,7 @@ module Domain::Inkbunny::Job
page: page,
)
response =
http_client.post(
url,
caused_by_entry: first_log_entry || caused_by_entry,
)
first_log_entry ||= response.log_entry
response = http_client.post(url)
if response.status_code != 200
fatal_error("api_search failed: #{response.status_code}")
end
@@ -61,22 +54,18 @@ module Domain::Inkbunny::Job
logger.info("[no new posts, stopping]")
break
end
rid = result[:rid] || raise("no rid")
break if result[:num_pages] <= page
rid = T.cast(result[:rid], String)
break if T.cast(result[:num_pages], Integer) <= page
page += 1
end
logger.info("[total new posts: #{result[:num_total_new_posts]}]")
user.scanned_gallery_at = Time.current
user.save!
if processor.changed_posts.any?
defer_job(
Domain::Inkbunny::Job::UpdatePostsJob,
{
ib_post_ids: processor.changed_posts.map(&:ib_post_id),
caused_by_entry: first_log_entry,
},
{ ib_post_ids: processor.changed_posts.map(&:ib_post_id) },
)
end
end

View File

@@ -15,8 +15,7 @@ class Domain::Twitter::Job::MediaJob < Domain::Twitter::Job::TwitterJobBase
return
end
response =
http_client.get(@media.url_str, caused_by_entry: @caused_by_entry)
response = http_client.get(@media.url_str)
logger.debug "#{HexUtil.humansize(T.must(response.log_entry.response&.size))} / " +
"#{response.log_entry.content_type} / " +

View File

@@ -11,6 +11,63 @@ class Scraper::JobBase < ApplicationJob
DeferredJob = Struct.new(:job_class, :params, :set_args)
class WrappedHttpClient
extend T::Sig
sig { params(job: Scraper::JobBase, http_client: Scraper::HttpClient).void }
def initialize(job, http_client)
@job = job
@http_client = http_client
end
sig do
params(url: String, use_http_cache: T::Boolean).returns(
Scraper::HttpClient::Response,
)
end
def get(url, use_http_cache: false)
around_request(
proc do
@http_client.get(
url,
caused_by_entry: @job.causing_log_entry,
use_http_cache: use_http_cache,
)
end,
)
end
sig do
params(url: String, use_http_cache: T::Boolean).returns(
Scraper::HttpClient::Response,
)
end
def post(url, use_http_cache: false)
around_request(
proc do
@http_client.post(
url,
caused_by_entry: @job.causing_log_entry,
use_http_cache: use_http_cache,
)
end,
)
end
private
sig do
params(proc: T.proc.returns(Scraper::HttpClient::Response)).returns(
Scraper::HttpClient::Response,
)
end
def around_request(proc)
response = proc.call
@job.first_log_entry ||= response.log_entry
response
end
end
sig { params(args: T.untyped).void }
def initialize(*args)
super(*T.unsafe(args))
@@ -24,9 +81,10 @@ class Scraper::JobBase < ApplicationJob
def self.http_factory_method
end
sig { returns(Scraper::HttpClient) }
sig { returns(WrappedHttpClient) }
def http_client
@http_client ||= Scraper::ClientFactory.send(self.class.http_factory_method)
WrappedHttpClient.new(self, @http_client)
end
sig { returns(Scraper::GalleryDlClient) }