abstraction for http_client
This commit is contained in:
@@ -4,13 +4,8 @@ class Domain::E621::Job::PostsIndexJob < Domain::E621::Job::Base
|
||||
|
||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).void }
|
||||
def perform(args)
|
||||
response =
|
||||
http_client.get(
|
||||
"https://e621.net/posts.json",
|
||||
caused_by_entry: causing_log_entry,
|
||||
)
|
||||
response = http_client.get("https://e621.net/posts.json")
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
|
||||
@@ -21,8 +21,7 @@ class Domain::E621::Job::ScanPostFavsJob < Domain::E621::Job::Base
|
||||
logger.info("requesting page #{page}")
|
||||
url =
|
||||
"https://e621.net/posts/#{post.e621_id}/favorites?limit=#{MAX_USERS_PER_PAGE}&page=#{page}"
|
||||
response = http_client.get(url, caused_by_entry: causing_log_entry)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
response = http_client.get(url)
|
||||
if response.status_code != 200
|
||||
fatal_error("#{response.status_code} - bailing")
|
||||
end
|
||||
|
||||
@@ -20,13 +20,8 @@ class Domain::E621::Job::ScanPostJob < Domain::E621::Job::Base
|
||||
end
|
||||
|
||||
logger.info("Scanning post #{post.e621_id}")
|
||||
response =
|
||||
http_client.get(
|
||||
"https://e621.net/posts/#{post.e621_id}.json",
|
||||
caused_by_entry: causing_log_entry,
|
||||
)
|
||||
response = http_client.get("https://e621.net/posts/#{post.e621_id}.json")
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
if response.status_code != 200
|
||||
post.state_detail["scan_log_entry_id"] = log_entry.id
|
||||
post.state = :scan_error
|
||||
|
||||
@@ -37,9 +37,7 @@ class Domain::E621::Job::ScanUserFavsJob < Domain::E621::Job::Base
|
||||
else
|
||||
limiter = "(none)"
|
||||
end
|
||||
response = http_client.get(url, caused_by_entry: causing_log_entry)
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
response = http_client.get(url)
|
||||
|
||||
if response.status_code == 403 &&
|
||||
response.body.include?("This users favorites are hidden")
|
||||
|
||||
@@ -12,9 +12,7 @@ class Domain::E621::Job::ScanUsersJob < Domain::E621::Job::Base
|
||||
|
||||
url = "https://e621.net/users.json?limit=320"
|
||||
url += "&page=b#{after}" if after
|
||||
response = http_client.get(url, caused_by_entry: causing_log_entry)
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
response = http_client.get(url)
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
|
||||
@@ -24,8 +24,7 @@ class Domain::E621::Job::StaticFileJob < Domain::E621::Job::Base
|
||||
end
|
||||
end
|
||||
|
||||
response = http_client.get(file_url_str, caused_by_entry: causing_log_entry)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
response = http_client.get(file_url_str)
|
||||
|
||||
if response.status_code != 200
|
||||
post.state = :file_error
|
||||
|
||||
@@ -38,9 +38,7 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
|
||||
url = "https://www.furaffinity.net/browse/#{@page_number}/"
|
||||
end
|
||||
|
||||
response = http_client.get(url, caused_by_entry: causing_log_entry)
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
response = http_client.get(url)
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
|
||||
@@ -119,13 +119,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
else
|
||||
"https://www.furaffinity.net/favorites/#{user.url_name}/"
|
||||
end
|
||||
response =
|
||||
http_client.get(
|
||||
url,
|
||||
caused_by_entry: causing_log_entry,
|
||||
use_http_cache: @use_http_cache,
|
||||
)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
response = http_client.get(url, use_http_cache: @use_http_cache)
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
"http #{response.status_code.to_s.red.bold}, " +
|
||||
|
||||
@@ -24,9 +24,7 @@ class Domain::Fa::Job::HomePageJob < Domain::Fa::Job::Base
|
||||
|
||||
def scan_home_page
|
||||
url = "https://www.furaffinity.net/"
|
||||
response = http_client.get(url, caused_by_entry: causing_log_entry)
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
response = http_client.get(url)
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
|
||||
@@ -63,9 +63,7 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
|
||||
return
|
||||
end
|
||||
|
||||
response =
|
||||
http_client.get(post.file_uri.to_s, caused_by_entry: causing_log_entry)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
response = http_client.get(post.file_uri.to_s)
|
||||
|
||||
if response.status_code == 404
|
||||
post.state_detail["404_count"] ||= 0
|
||||
|
||||
@@ -59,11 +59,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
sig { params(post: Domain::Fa::Post).void }
|
||||
def scan_post(post)
|
||||
response =
|
||||
http_client.get(
|
||||
"https://www.furaffinity.net/view/#{post.fa_id}/",
|
||||
caused_by_entry: causing_log_entry,
|
||||
)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
http_client.get("https://www.furaffinity.net/view/#{post.fa_id}/")
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
"error scanning fa_id #{post.fa_id}: #{response.status_code}, log entry #{response.log_entry.id}",
|
||||
|
||||
@@ -12,11 +12,7 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
response =
|
||||
http_client.get(
|
||||
"https://a.furaffinity.net/0/#{user.url_name}.gif",
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
http_client.get("https://a.furaffinity.net/0/#{user.url_name}.gif")
|
||||
|
||||
avatar.state_detail["log_entries"] ||= [avatar.log_entry&.id].compact
|
||||
avatar.state_detail["log_entries"] << response.log_entry.id
|
||||
|
||||
@@ -76,8 +76,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
else
|
||||
"https://www.furaffinity.net/watchlist/by/#{user.url_name}/"
|
||||
end
|
||||
response = http_client.get(url, caused_by_entry: causing_log_entry)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
response = http_client.get(url)
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
"http #{response.status_code.to_s.red.bold}, " +
|
||||
|
||||
@@ -86,7 +86,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
|
||||
folder_href = "/" + folder_href unless folder_href.start_with?("/")
|
||||
page_url =
|
||||
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
|
||||
response = http_client.get(page_url, caused_by_entry: causing_log_entry)
|
||||
response = http_client.get(page_url)
|
||||
log_entry = response.log_entry
|
||||
|
||||
if response.status_code == 200
|
||||
|
||||
@@ -28,11 +28,7 @@ module Domain::Fa::Job
|
||||
end
|
||||
|
||||
response =
|
||||
http_client.get(
|
||||
"https://www.furaffinity.net/user/#{user.url_name}/",
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
@log_entry = response.log_entry
|
||||
http_client.get("https://www.furaffinity.net/user/#{user.url_name}/")
|
||||
|
||||
ret, opts =
|
||||
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(
|
||||
|
||||
@@ -17,10 +17,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
response =
|
||||
http_client.get(
|
||||
"https://www.furaffinity.net/user/#{user.url_name}/",
|
||||
caused_by_entry: causing_log_entry,
|
||||
)
|
||||
http_client.get("https://www.furaffinity.net/user/#{user.url_name}/")
|
||||
|
||||
ret, opts =
|
||||
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(user, response)
|
||||
|
||||
@@ -1,9 +1,22 @@
|
||||
# typed: true
|
||||
# typed: strict
|
||||
class Domain::Inkbunny::Job::Base < Scraper::JobBase
|
||||
extend T::Sig
|
||||
|
||||
discard_on ActiveJob::DeserializationError
|
||||
queue_as :inkbunny
|
||||
|
||||
sig { override.returns(Symbol) }
|
||||
def self.http_factory_method
|
||||
:get_inkbunny_http_client
|
||||
end
|
||||
|
||||
sig { returns(T.nilable(Domain::Inkbunny::User)) }
|
||||
def user_from_args
|
||||
T.cast(arguments[0][:user], T.nilable(Domain::Inkbunny::User))
|
||||
end
|
||||
|
||||
sig { returns(Domain::Inkbunny::User) }
|
||||
def user_from_args!
|
||||
user_from_args || raise("user must exist")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# typed: true
|
||||
# typed: strict
|
||||
module Domain::Inkbunny::Job
|
||||
class FileJob < Base
|
||||
queue_as :static_file
|
||||
|
||||
sig { override.params(args: T.untyped).void }
|
||||
def perform(args)
|
||||
file = args[:file] || fatal_error("file is required")
|
||||
caused_by_entry = args[:caused_by_entry]
|
||||
logger.prefix =
|
||||
proc do
|
||||
"[#{file.id.to_s.bold} / " + "#{file.ib_file_id.to_s.bold} / " +
|
||||
@@ -21,7 +21,7 @@ module Domain::Inkbunny::Job
|
||||
end
|
||||
end
|
||||
|
||||
response = http_client.get(url_str, caused_by_entry: caused_by_entry)
|
||||
response = http_client.get(url_str)
|
||||
|
||||
if response.status_code != 200
|
||||
file.state = :error
|
||||
|
||||
@@ -16,9 +16,7 @@ module Domain::Inkbunny::Job
|
||||
end
|
||||
|
||||
url = ApiSearchPageProcessor.build_api_search_url(rid: rid, page: page)
|
||||
response = http_client.post(url, caused_by_entry: causing_log_entry)
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
response = http_client.post(url)
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error("api_search failed: #{response.status_code}")
|
||||
@@ -27,7 +25,7 @@ module Domain::Inkbunny::Job
|
||||
result =
|
||||
processor.process!(
|
||||
JSON.parse(response.body),
|
||||
caused_by_entry: log_entry,
|
||||
caused_by_entry: response.log_entry,
|
||||
)
|
||||
num_new_posts = T.cast(result[:num_new_posts], Integer)
|
||||
logger.info(
|
||||
|
||||
@@ -28,9 +28,7 @@ module Domain::Inkbunny::Job
|
||||
rid: rid,
|
||||
page: page,
|
||||
)
|
||||
response = http_client.post(url, caused_by_entry: causing_log_entry)
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
response = http_client.post(url)
|
||||
pool.deep_update_log_entry = causing_log_entry
|
||||
|
||||
if response.status_code != 200
|
||||
@@ -40,7 +38,7 @@ module Domain::Inkbunny::Job
|
||||
result =
|
||||
processor.process!(
|
||||
JSON.parse(response.body),
|
||||
caused_by_entry: log_entry,
|
||||
caused_by_entry: response.log_entry,
|
||||
)
|
||||
|
||||
rid ||= T.cast(result[:rid], String)
|
||||
|
||||
@@ -58,9 +58,7 @@ class Domain::Inkbunny::Job::UpdatePostsJob < Domain::Inkbunny::Job::Base
|
||||
pools_to_update
|
||||
)
|
||||
url = build_api_submissions_url(ib_post_ids_chunk)
|
||||
response = http_client.get(url, caused_by_entry: causing_log_entry)
|
||||
log_entry = response.log_entry
|
||||
self.first_log_entry ||= log_entry
|
||||
response = http_client.get(url)
|
||||
if response.status_code != 200
|
||||
fatal_error("api_submissions failed: #{response.status_code}")
|
||||
end
|
||||
@@ -72,7 +70,7 @@ class Domain::Inkbunny::Job::UpdatePostsJob < Domain::Inkbunny::Job::Base
|
||||
Domain::Inkbunny::Post.transaction do
|
||||
deep_update_post_from_submission_json(
|
||||
submission_json,
|
||||
log_entry,
|
||||
response.log_entry,
|
||||
missing_pool_post_ib_ids,
|
||||
pools_to_update,
|
||||
)
|
||||
|
||||
@@ -1,49 +1,53 @@
|
||||
# typed: true
|
||||
# typed: strict
|
||||
module Domain::Inkbunny::Job
|
||||
class UserAvatarJob < Base
|
||||
queue_as :static_file
|
||||
|
||||
sig { params(args: T.untyped).void }
|
||||
def initialize(*args)
|
||||
super(*T.unsafe(args))
|
||||
end
|
||||
|
||||
sig { override.params(args: T.untyped).void }
|
||||
def perform(args)
|
||||
@user = args[:user] || raise("user must exist")
|
||||
@caused_by_entry = args[:caused_by_entry]
|
||||
user = user_from_args!
|
||||
|
||||
logger.prefix =
|
||||
proc do
|
||||
"[user #{@user.name.to_s.bold} / #{@user.ib_user_id.to_s.bold}]"
|
||||
end
|
||||
proc { "[user #{user.name.to_s.bold} / #{user.ib_user_id.to_s.bold}]" }
|
||||
|
||||
if @user.avatar_url_str.blank?
|
||||
avatar_url_str = user.avatar_url_str
|
||||
if avatar_url_str.blank?
|
||||
logger.warn("user has no avatar_url_str")
|
||||
return
|
||||
end
|
||||
|
||||
response =
|
||||
http_client.get(@user.avatar_url_str, caused_by_entry: @caused_by_entry)
|
||||
response = http_client.get(avatar_url_str)
|
||||
self.first_log_entry ||= response.log_entry
|
||||
|
||||
@user.avatar_state_detail ||= {}
|
||||
@user.avatar_state_detail["log_entries"] ||= [
|
||||
@user.avatar_file_log_entry_id,
|
||||
user.avatar_state_detail ||= {}
|
||||
user.avatar_state_detail["log_entries"] ||= [
|
||||
user.avatar_file_log_entry_id,
|
||||
].compact
|
||||
@user.avatar_state_detail["log_entries"] << response.log_entry.id
|
||||
@user.avatar_log_entry = response.log_entry
|
||||
user.avatar_state_detail["log_entries"] << response.log_entry.id
|
||||
user.avatar_log_entry = response.log_entry
|
||||
|
||||
case response.status_code
|
||||
when 200
|
||||
@user.avatar_state = :ok
|
||||
@user.avatar_state_detail.delete("download_error")
|
||||
@user.avatar_downloaded_at = response.log_entry.created_at
|
||||
@user.avatar_file_sha256 = response.log_entry.response_sha256
|
||||
user.avatar_state = :ok
|
||||
user.avatar_state_detail.delete("download_error")
|
||||
user.avatar_downloaded_at = response.log_entry.created_at
|
||||
user.avatar_file_sha256 = response.log_entry.response_sha256
|
||||
logger.info("downloaded avatar")
|
||||
when 404
|
||||
@user.avatar_state = :not_found
|
||||
user.avatar_state = :not_found
|
||||
logger.info("avatar 404")
|
||||
else
|
||||
@user.avatar_state = :error
|
||||
@user.avatar_state_detail[
|
||||
user.avatar_state = :error
|
||||
user.avatar_state_detail[
|
||||
"download_error"
|
||||
] = "http status #{response.status_code}"
|
||||
if @user.avatar_file_sha256.blank?
|
||||
@user.avatar_downloaded_at = response.log_entry.created_at
|
||||
if user.avatar_file_sha256.blank?
|
||||
user.avatar_downloaded_at = response.log_entry.created_at
|
||||
logger.info("avatar error, and no previous file")
|
||||
else
|
||||
logger.info("avatar error, keeping previous file")
|
||||
@@ -53,7 +57,7 @@ module Domain::Inkbunny::Job
|
||||
)
|
||||
end
|
||||
ensure
|
||||
@user.save! if @user
|
||||
user.save! if user
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
# typed: false
|
||||
# typed: true
|
||||
module Domain::Inkbunny::Job
|
||||
class UserGalleryJob < Base
|
||||
def perform(args)
|
||||
user = args[:user] || raise("user must exist")
|
||||
caused_by_entry = args[:caused_by_entry]
|
||||
logger.prefix = "[#{user.name.bold} / #{user.ib_user_id.to_s.bold}]"
|
||||
user = user_from_args!
|
||||
logger.prefix = "[#{user.name&.bold} / #{user.ib_user_id.to_s.bold}]"
|
||||
|
||||
if user.scanned_gallery_at&.after?(1.week.ago)
|
||||
logger.warn(
|
||||
@@ -14,13 +13,12 @@ module Domain::Inkbunny::Job
|
||||
end
|
||||
|
||||
processor = ApiSearchPageProcessor.new
|
||||
first_log_entry = nil
|
||||
rid = nil
|
||||
page = 1
|
||||
loop_count = 0
|
||||
max_loop_count = 2000
|
||||
rid = T.let(nil, T.nilable(String))
|
||||
page = T.let(1, Integer)
|
||||
loop_count = T.let(0, Integer)
|
||||
max_loop_count = T.let(2000, Integer)
|
||||
|
||||
while true
|
||||
loop do
|
||||
loop_count += 1
|
||||
raise("loop_count: #{loop_count}") if loop_count > max_loop_count
|
||||
|
||||
@@ -31,12 +29,7 @@ module Domain::Inkbunny::Job
|
||||
page: page,
|
||||
)
|
||||
|
||||
response =
|
||||
http_client.post(
|
||||
url,
|
||||
caused_by_entry: first_log_entry || caused_by_entry,
|
||||
)
|
||||
first_log_entry ||= response.log_entry
|
||||
response = http_client.post(url)
|
||||
if response.status_code != 200
|
||||
fatal_error("api_search failed: #{response.status_code}")
|
||||
end
|
||||
@@ -61,22 +54,18 @@ module Domain::Inkbunny::Job
|
||||
logger.info("[no new posts, stopping]")
|
||||
break
|
||||
end
|
||||
rid = result[:rid] || raise("no rid")
|
||||
break if result[:num_pages] <= page
|
||||
rid = T.cast(result[:rid], String)
|
||||
break if T.cast(result[:num_pages], Integer) <= page
|
||||
page += 1
|
||||
end
|
||||
|
||||
logger.info("[total new posts: #{result[:num_total_new_posts]}]")
|
||||
user.scanned_gallery_at = Time.current
|
||||
user.save!
|
||||
|
||||
if processor.changed_posts.any?
|
||||
defer_job(
|
||||
Domain::Inkbunny::Job::UpdatePostsJob,
|
||||
{
|
||||
ib_post_ids: processor.changed_posts.map(&:ib_post_id),
|
||||
caused_by_entry: first_log_entry,
|
||||
},
|
||||
{ ib_post_ids: processor.changed_posts.map(&:ib_post_id) },
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -15,8 +15,7 @@ class Domain::Twitter::Job::MediaJob < Domain::Twitter::Job::TwitterJobBase
|
||||
return
|
||||
end
|
||||
|
||||
response =
|
||||
http_client.get(@media.url_str, caused_by_entry: @caused_by_entry)
|
||||
response = http_client.get(@media.url_str)
|
||||
|
||||
logger.debug "#{HexUtil.humansize(T.must(response.log_entry.response&.size))} / " +
|
||||
"#{response.log_entry.content_type} / " +
|
||||
|
||||
@@ -11,6 +11,63 @@ class Scraper::JobBase < ApplicationJob
|
||||
|
||||
DeferredJob = Struct.new(:job_class, :params, :set_args)
|
||||
|
||||
class WrappedHttpClient
|
||||
extend T::Sig
|
||||
|
||||
sig { params(job: Scraper::JobBase, http_client: Scraper::HttpClient).void }
|
||||
def initialize(job, http_client)
|
||||
@job = job
|
||||
@http_client = http_client
|
||||
end
|
||||
|
||||
sig do
|
||||
params(url: String, use_http_cache: T::Boolean).returns(
|
||||
Scraper::HttpClient::Response,
|
||||
)
|
||||
end
|
||||
def get(url, use_http_cache: false)
|
||||
around_request(
|
||||
proc do
|
||||
@http_client.get(
|
||||
url,
|
||||
caused_by_entry: @job.causing_log_entry,
|
||||
use_http_cache: use_http_cache,
|
||||
)
|
||||
end,
|
||||
)
|
||||
end
|
||||
|
||||
sig do
|
||||
params(url: String, use_http_cache: T::Boolean).returns(
|
||||
Scraper::HttpClient::Response,
|
||||
)
|
||||
end
|
||||
def post(url, use_http_cache: false)
|
||||
around_request(
|
||||
proc do
|
||||
@http_client.post(
|
||||
url,
|
||||
caused_by_entry: @job.causing_log_entry,
|
||||
use_http_cache: use_http_cache,
|
||||
)
|
||||
end,
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
sig do
|
||||
params(proc: T.proc.returns(Scraper::HttpClient::Response)).returns(
|
||||
Scraper::HttpClient::Response,
|
||||
)
|
||||
end
|
||||
def around_request(proc)
|
||||
response = proc.call
|
||||
@job.first_log_entry ||= response.log_entry
|
||||
response
|
||||
end
|
||||
end
|
||||
|
||||
sig { params(args: T.untyped).void }
|
||||
def initialize(*args)
|
||||
super(*T.unsafe(args))
|
||||
@@ -24,9 +81,10 @@ class Scraper::JobBase < ApplicationJob
|
||||
def self.http_factory_method
|
||||
end
|
||||
|
||||
sig { returns(Scraper::HttpClient) }
|
||||
sig { returns(WrappedHttpClient) }
|
||||
def http_client
|
||||
@http_client ||= Scraper::ClientFactory.send(self.class.http_factory_method)
|
||||
WrappedHttpClient.new(self, @http_client)
|
||||
end
|
||||
|
||||
sig { returns(Scraper::GalleryDlClient) }
|
||||
|
||||
Reference in New Issue
Block a user