better behavior for users that are not found
This commit is contained in:
@@ -378,6 +378,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
return nil unless page.probably_user_page?
|
||||
|
||||
user_page = page.user_page
|
||||
user.state_ok!
|
||||
user.name = user_page.name
|
||||
user.registered_at = user_page.registered_since
|
||||
user.num_pageviews = user_page.num_pageviews
|
||||
@@ -439,15 +440,9 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
end
|
||||
|
||||
FoundLink = Scraper::LinkFinder::FoundLink
|
||||
class JobDef < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
const :job, Domain
|
||||
end
|
||||
sig do
|
||||
params(log_entry: HttpLogEntry, suppress_jobs: T::Array[T.untyped]).void
|
||||
end
|
||||
def enqueue_jobs_from_found_links(log_entry, suppress_jobs: [])
|
||||
sig { params(log_entry: HttpLogEntry).void }
|
||||
def enqueue_jobs_from_found_links(log_entry)
|
||||
return if skip_enqueue_found_links?
|
||||
|
||||
logger.tagged("link-finder") do
|
||||
@@ -541,6 +536,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
[
|
||||
/User ".+" was not found in our database\./,
|
||||
/The username ".+" could not be found\./,
|
||||
%r{This user cannot be found.<br/><br/>},
|
||||
],
|
||||
T::Array[Regexp],
|
||||
)
|
||||
@@ -572,12 +568,22 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
)
|
||||
end
|
||||
|
||||
suppress_user_jobs =
|
||||
Kernel.lambda do |user|
|
||||
suppress_deferred_job(Domain::Fa::Job::UserPageJob, { user: })
|
||||
suppress_deferred_job(Domain::Fa::Job::FavsJob, { user: })
|
||||
suppress_deferred_job(Domain::Fa::Job::UserGalleryJob, { user: })
|
||||
suppress_deferred_job(Domain::Fa::Job::UserFollowsJob, { user: })
|
||||
end
|
||||
|
||||
if DISABLED_PAGE_PATTERNS.any? { |pattern| response.body =~ pattern }
|
||||
user.state_account_disabled!
|
||||
user.is_disabled = true
|
||||
suppress_user_jobs.call(user)
|
||||
true
|
||||
elsif NOT_FOUND_PAGE_PATTERNS.any? { |pattern| response.body =~ pattern }
|
||||
user.state_error!
|
||||
suppress_user_jobs.call(user)
|
||||
true
|
||||
else
|
||||
false
|
||||
|
||||
@@ -31,11 +31,10 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
enqueue_user_scan(creator, at_most_one_scan: true)
|
||||
end
|
||||
|
||||
if first_log_entry&.status_code == 200
|
||||
enqueue_jobs_from_found_links(
|
||||
T.must(first_log_entry),
|
||||
suppress_jobs: [{ job: self.class, fa_id: post.fa_id }],
|
||||
)
|
||||
if (fle = first_log_entry) && (fle.status_code == 200)
|
||||
suppress_deferred_job(Domain::Fa::Job::ScanPostJob, { fa_id: post.fa_id })
|
||||
suppress_deferred_job(Domain::Fa::Job::ScanPostJob, { post: })
|
||||
enqueue_jobs_from_found_links(fle)
|
||||
end
|
||||
|
||||
logger.info format_tags("finished post scan")
|
||||
|
||||
@@ -43,10 +43,8 @@ module Domain::Fa::Job
|
||||
user.save! if user
|
||||
|
||||
if response && response.status_code == 200 && user.present?
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{ job: self.class, url_name: user.url_name }],
|
||||
)
|
||||
suppress_deferred_job(Domain::Fa::Job::UserIncrementalJob, { user: })
|
||||
enqueue_jobs_from_found_links(response.log_entry)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -22,6 +22,11 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
|
||||
page = update_user_from_user_page(user, response)
|
||||
user_page = page&.user_page
|
||||
|
||||
if user.state_error? && user_page.nil?
|
||||
logger.info("page error / user not found")
|
||||
return
|
||||
end
|
||||
|
||||
if user.state_ok? && user_page
|
||||
check_skip_gallery_scan(user)
|
||||
check_skip_favs_scan(user, user_page)
|
||||
@@ -34,10 +39,8 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
|
||||
ensure
|
||||
user.save! if user
|
||||
if user && response && (response.status_code == 200)
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{ job: self.class, url_name: user.url_name }],
|
||||
)
|
||||
logger.info("enqueueing link finder jobs")
|
||||
enqueue_jobs_from_found_links(response.log_entry)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -63,6 +63,7 @@ class Scraper::JobBase < ApplicationJob
|
||||
def initialize(*args)
|
||||
super(*T.unsafe(args))
|
||||
@deferred_jobs = T.let(Set.new, T::Set[DeferredJob])
|
||||
@suppressed_jobs = T.let(Set.new, T::Set[SuppressedJob])
|
||||
@http_client = T.let(nil, T.nilable(Scraper::HttpClient))
|
||||
@gallery_dl_client = T.let(nil, T.nilable(Scraper::GalleryDlClient))
|
||||
@first_log_entry = T.let(nil, T.nilable(HttpLogEntry))
|
||||
@@ -220,10 +221,41 @@ class Scraper::JobBase < ApplicationJob
|
||||
!!@deferred_jobs.add?(DeferredJob.new(job_class:, params:, set_args:))
|
||||
end
|
||||
|
||||
sig do
|
||||
params(
|
||||
job_class: T.class_of(Scraper::JobBase),
|
||||
params: T::Hash[Symbol, T.untyped],
|
||||
).void
|
||||
end
|
||||
def suppress_deferred_job(job_class, params)
|
||||
ignore_args = job_class.gather_ignore_signature_args
|
||||
params_cleared =
|
||||
params.reject { |key, value| ignore_args.include?(key.to_sym) }
|
||||
!!@suppressed_jobs.add?(
|
||||
SuppressedJob.new(job_class:, params: params_cleared),
|
||||
)
|
||||
end
|
||||
|
||||
sig { void }
|
||||
def enqueue_deferred_jobs!
|
||||
jobs_to_enqueue =
|
||||
@deferred_jobs.filter_map do |deferred_job|
|
||||
if @suppressed_jobs.any? { |suppressed_job|
|
||||
if suppressed_job.matches?(deferred_job)
|
||||
logger.info(
|
||||
"suppressing deferred job #{deferred_job.job_class.name} with params #{deferred_job.describe_params}",
|
||||
)
|
||||
true
|
||||
end
|
||||
}
|
||||
nil
|
||||
else
|
||||
deferred_job
|
||||
end
|
||||
end
|
||||
|
||||
GoodJob::Bulk.enqueue do
|
||||
@deferred_jobs.each do |deferred_job|
|
||||
jobs_to_enqueue.each do |deferred_job|
|
||||
args =
|
||||
deferred_job.params.merge(
|
||||
{
|
||||
|
||||
@@ -1,9 +1,40 @@
|
||||
# typed: strict
|
||||
|
||||
class DeferredJob < T::ImmutableStruct
|
||||
extend T::Sig
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
const :job_class, T.class_of(Scraper::JobBase)
|
||||
const :params, T::Hash[Symbol, T.untyped]
|
||||
const :set_args, T::Hash[Symbol, T.untyped]
|
||||
|
||||
sig { returns(String) }
|
||||
def describe_params
|
||||
params
|
||||
.filter_map do |key, value|
|
||||
if value.nil?
|
||||
nil
|
||||
elsif value.is_a?(Domain::User) || value.is_a?(Domain::Post)
|
||||
"#{key}=#{value.to_param}"
|
||||
else
|
||||
"#{key}=#{value}"
|
||||
end
|
||||
end
|
||||
.map { |param| "[#{param}]" }
|
||||
.join("")
|
||||
end
|
||||
end
|
||||
|
||||
class SuppressedJob < T::ImmutableStruct
|
||||
extend T::Sig
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
const :job_class, T.class_of(Scraper::JobBase)
|
||||
const :params, T::Hash[Symbol, T.untyped]
|
||||
|
||||
sig { params(job: DeferredJob).returns(T::Boolean) }
|
||||
def matches?(job)
|
||||
job.job_class == job_class &&
|
||||
params.all? { |key, value| job.params[key] == value }
|
||||
end
|
||||
end
|
||||
|
||||
@@ -375,4 +375,34 @@ describe Domain::Fa::Job::UserPageJob do
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context "user not found" do
|
||||
let(:client_mock_config) do
|
||||
[
|
||||
{
|
||||
uri: "https://www.furaffinity.net/user/onefatpokemon/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents:
|
||||
SpecUtil.read_fixture_file(
|
||||
"domain/fa/user_page/user_page_onefatpokemon_not_found.html",
|
||||
),
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
it "does not enqueue a user page job" do
|
||||
perform_now({ url_name: "onefatpokemon" })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
|
||||
).to be_empty
|
||||
end
|
||||
|
||||
it "marks the user as error" do
|
||||
perform_now({ url_name: "onefatpokemon" })
|
||||
user = Domain::User::FaUser.find_by(url_name: "onefatpokemon")
|
||||
expect(user).to_not be_nil
|
||||
expect(user.state).to eq("error")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -109,7 +109,7 @@ RSpec.describe Scraper::FaHttpClientConfig do
|
||||
it "returns the configured rate limits" do
|
||||
config = described_class.new
|
||||
expect(config.ratelimit).to eq(
|
||||
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]],
|
||||
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]],
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
25
test/fixtures/files/domain/fa/user_page/user_page_onefatpokemon_not_found.html
vendored
Normal file
25
test/fixtures/files/domain/fa/user_page/user_page_onefatpokemon_not_found.html
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<!-- -->
|
||||
<html>
|
||||
<head>
|
||||
<title>System Error</title>
|
||||
|
||||
<link href="/themes/beta/img/favicon.ico"/>
|
||||
<link type="text/css" rel="stylesheet" href="/themes/beta/css/ui_theme_dark.css?u=2025011001" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<section style="margin: 30px auto; max-width: 800px;">
|
||||
<div class="section-header">
|
||||
<h2>System Error</h2>
|
||||
</div>
|
||||
<div class="section-body">
|
||||
This user cannot be found.<br/><br/>Here are a few suggestions to help you out:<br/>• Check that the username is spelled correctly.<br/>• Try to do what you were doing again, but take out any odd symbols, spaces, and underscores.<br/> <br>
|
||||
<div class="alignright"><a class="button standard" href="javascript:history.go(-1)">Click here to go back</a></div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user