better behavior for users that are not found

This commit is contained in:
Dylan Knutson
2025-02-28 21:51:29 +00:00
parent 74bafc027a
commit 7437586dda
9 changed files with 147 additions and 23 deletions

View File

@@ -378,6 +378,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
return nil unless page.probably_user_page?
user_page = page.user_page
user.state_ok!
user.name = user_page.name
user.registered_at = user_page.registered_since
user.num_pageviews = user_page.num_pageviews
@@ -439,15 +440,9 @@ class Domain::Fa::Job::Base < Scraper::JobBase
end
FoundLink = Scraper::LinkFinder::FoundLink
class JobDef < T::ImmutableStruct
include T::Struct::ActsAsComparable
const :job, Domain
end
sig do
params(log_entry: HttpLogEntry, suppress_jobs: T::Array[T.untyped]).void
end
def enqueue_jobs_from_found_links(log_entry, suppress_jobs: [])
sig { params(log_entry: HttpLogEntry).void }
def enqueue_jobs_from_found_links(log_entry)
return if skip_enqueue_found_links?
logger.tagged("link-finder") do
@@ -541,6 +536,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
[
/User ".+" was not found in our database\./,
/The username ".+" could not be found\./,
%r{This user cannot be found.<br/><br/>},
],
T::Array[Regexp],
)
@@ -572,12 +568,22 @@ class Domain::Fa::Job::Base < Scraper::JobBase
)
end
suppress_user_jobs =
Kernel.lambda do |user|
suppress_deferred_job(Domain::Fa::Job::UserPageJob, { user: })
suppress_deferred_job(Domain::Fa::Job::FavsJob, { user: })
suppress_deferred_job(Domain::Fa::Job::UserGalleryJob, { user: })
suppress_deferred_job(Domain::Fa::Job::UserFollowsJob, { user: })
end
if DISABLED_PAGE_PATTERNS.any? { |pattern| response.body =~ pattern }
user.state_account_disabled!
user.is_disabled = true
suppress_user_jobs.call(user)
true
elsif NOT_FOUND_PAGE_PATTERNS.any? { |pattern| response.body =~ pattern }
user.state_error!
suppress_user_jobs.call(user)
true
else
false

View File

@@ -31,11 +31,10 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
enqueue_user_scan(creator, at_most_one_scan: true)
end
if first_log_entry&.status_code == 200
enqueue_jobs_from_found_links(
T.must(first_log_entry),
suppress_jobs: [{ job: self.class, fa_id: post.fa_id }],
)
if (fle = first_log_entry) && (fle.status_code == 200)
suppress_deferred_job(Domain::Fa::Job::ScanPostJob, { fa_id: post.fa_id })
suppress_deferred_job(Domain::Fa::Job::ScanPostJob, { post: })
enqueue_jobs_from_found_links(fle)
end
logger.info format_tags("finished post scan")

View File

@@ -43,10 +43,8 @@ module Domain::Fa::Job
user.save! if user
if response && response.status_code == 200 && user.present?
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: user.url_name }],
)
suppress_deferred_job(Domain::Fa::Job::UserIncrementalJob, { user: })
enqueue_jobs_from_found_links(response.log_entry)
end
end

View File

@@ -22,6 +22,11 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
page = update_user_from_user_page(user, response)
user_page = page&.user_page
if user.state_error? && user_page.nil?
logger.info("page error / user not found")
return
end
if user.state_ok? && user_page
check_skip_gallery_scan(user)
check_skip_favs_scan(user, user_page)
@@ -34,10 +39,8 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
ensure
user.save! if user
if user && response && (response.status_code == 200)
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: user.url_name }],
)
logger.info("enqueueing link finder jobs")
enqueue_jobs_from_found_links(response.log_entry)
end
end

View File

@@ -63,6 +63,7 @@ class Scraper::JobBase < ApplicationJob
def initialize(*args)
super(*T.unsafe(args))
@deferred_jobs = T.let(Set.new, T::Set[DeferredJob])
@suppressed_jobs = T.let(Set.new, T::Set[SuppressedJob])
@http_client = T.let(nil, T.nilable(Scraper::HttpClient))
@gallery_dl_client = T.let(nil, T.nilable(Scraper::GalleryDlClient))
@first_log_entry = T.let(nil, T.nilable(HttpLogEntry))
@@ -220,10 +221,41 @@ class Scraper::JobBase < ApplicationJob
!!@deferred_jobs.add?(DeferredJob.new(job_class:, params:, set_args:))
end
sig do
params(
job_class: T.class_of(Scraper::JobBase),
params: T::Hash[Symbol, T.untyped],
).void
end
def suppress_deferred_job(job_class, params)
ignore_args = job_class.gather_ignore_signature_args
params_cleared =
params.reject { |key, value| ignore_args.include?(key.to_sym) }
!!@suppressed_jobs.add?(
SuppressedJob.new(job_class:, params: params_cleared),
)
end
sig { void }
def enqueue_deferred_jobs!
jobs_to_enqueue =
@deferred_jobs.filter_map do |deferred_job|
if @suppressed_jobs.any? { |suppressed_job|
if suppressed_job.matches?(deferred_job)
logger.info(
"suppressing deferred job #{deferred_job.job_class.name} with params #{deferred_job.describe_params}",
)
true
end
}
nil
else
deferred_job
end
end
GoodJob::Bulk.enqueue do
@deferred_jobs.each do |deferred_job|
jobs_to_enqueue.each do |deferred_job|
args =
deferred_job.params.merge(
{

View File

@@ -1,9 +1,40 @@
# typed: strict
class DeferredJob < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
const :job_class, T.class_of(Scraper::JobBase)
const :params, T::Hash[Symbol, T.untyped]
const :set_args, T::Hash[Symbol, T.untyped]
sig { returns(String) }
def describe_params
params
.filter_map do |key, value|
if value.nil?
nil
elsif value.is_a?(Domain::User) || value.is_a?(Domain::Post)
"#{key}=#{value.to_param}"
else
"#{key}=#{value}"
end
end
.map { |param| "[#{param}]" }
.join("")
end
end
class SuppressedJob < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
const :job_class, T.class_of(Scraper::JobBase)
const :params, T::Hash[Symbol, T.untyped]
sig { params(job: DeferredJob).returns(T::Boolean) }
def matches?(job)
job.job_class == job_class &&
params.all? { |key, value| job.params[key] == value }
end
end

View File

@@ -375,4 +375,34 @@ describe Domain::Fa::Job::UserPageJob do
)
end
end
context "user not found" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/onefatpokemon/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_onefatpokemon_not_found.html",
),
},
]
end
it "does not enqueue a user page job" do
perform_now({ url_name: "onefatpokemon" })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
).to be_empty
end
it "marks the user as error" do
perform_now({ url_name: "onefatpokemon" })
user = Domain::User::FaUser.find_by(url_name: "onefatpokemon")
expect(user).to_not be_nil
expect(user.state).to eq("error")
end
end
end

View File

@@ -109,7 +109,7 @@ RSpec.describe Scraper::FaHttpClientConfig do
it "returns the configured rate limits" do
config = described_class.new
expect(config.ratelimit).to eq(
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]],
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]],
)
end
end

View File

@@ -0,0 +1,25 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!-- -->
<html>
<head>
<title>System Error</title>
<link href="/themes/beta/img/favicon.ico"/>
<link type="text/css" rel="stylesheet" href="/themes/beta/css/ui_theme_dark.css?u=2025011001" />
</head>
<body>
<section style="margin: 30px auto; max-width: 800px;">
<div class="section-header">
<h2>System Error</h2>
</div>
<div class="section-body">
This user cannot be found.<br/><br/>Here are a few suggestions to help you out:<br/>&bull; Check that the username is spelled correctly.<br/>&bull; Try to do what you were doing again, but take out any odd symbols, spaces, and underscores.<br/> <br>
<div class="alignright"><a class="button standard" href="javascript:history.go(-1)">Click here to go back</a></div>
</div>
</section>
</body>
</html>