comprehensive tests for browse page job

This commit is contained in:
Dylan Knutson
2023-03-26 09:16:37 +09:00
parent deb7415650
commit 48c99c6fb0
6 changed files with 276 additions and 65 deletions

View File

@@ -49,7 +49,7 @@ class Domain::Fa::Job::FaJobBase < Scraper::JobBase
listing_page_stats.new_seen += 1 if post.new_record?
listing_page_stats.total_seen += 1
update_post_from_listings_page(job_type, post, submission, caused_by_entry)
update_and_save_post_from_listings_page(job_type, post, submission, caused_by_entry)
enqueue_user_scan(
post.creator,
caused_by_entry,
@@ -77,7 +77,7 @@ class Domain::Fa::Job::FaJobBase < Scraper::JobBase
listing_page_stats
end
def update_post_from_listings_page(job_type, post, submission, caused_by_entry)
def update_and_save_post_from_listings_page(job_type, post, submission, caused_by_entry)
if job_type == :browse_page
post.log_entry_detail["first_browse_page_id"] ||= caused_by_entry.id
elsif job_type == :gallery_page

View File

@@ -22,6 +22,10 @@ class ColorLogger
end
Logger.new(sink).tap do |logger|
if Rails.env.test?
logger.level = :error
end
def logger.prefix=(p)
@logger_prefix = p
end

View File

@@ -1,4 +1,6 @@
class Scraper::HttpFactory
@http_clients = Concurrent::ThreadLocalVar.new() { {} }
def self.http_client_mock=(mock)
@http_client_mock = mock
end
@@ -7,24 +9,19 @@ class Scraper::HttpFactory
if Rails.env.test?
@http_client_mock || raise("no mock set yet")
else
get_or_create_tvar_http_client(:fa, Scraper::FaHttpClientConfig.new)
get_or_create_client(:fa, Scraper::FaHttpClientConfig.new)
end
end
def self.new_gallery_dl_client
raise("not implemented yet")
end
def self.get_or_create_tvar_http_client(key, config)
attr_name = :"#{self.class.name.underscore}_#{key}_klass_http_client_tvar"
thread = Thread.current
if thread.thread_variable?(attr_name)
thread.thread_variable_get(attr_name)
else
proxy_config = Rails.application.config.x.proxy || raise("no proxy config")
performer = Scraper::HttpPerformer.new(proxy_config[:name], proxy_config[:http])
http_client = Scraper::HttpClient.new(config, performer)
thread.thread_variable_set(attr_name, http_client)
http_client
end
def self.get_or_create_client(key, config)
@http_clients.value[key] ||= begin
proxy_config = Rails.application.config.x.proxy || raise("no proxy config")
performer = Scraper::HttpPerformer.new(proxy_config[:name], proxy_config[:http])
Scraper::HttpClient.new(config, performer)
end
end
end

View File

@@ -19,10 +19,15 @@ module ReduxScraper
config.good_job.inline_execution_respects_schedule = true
config.good_job.execution_mode = :async
config.good_job.active_record_parent_class = "ReduxApplicationRecord"
config.good_job.preserve_job_records = false
config.good_job.retry_on_unhandled_error = true
config.good_job.smaller_number_is_higher_priority = true
# job cleanup config - retain the last 24 hours of jobs
config.good_job.preserve_job_records = true
config.good_job.cleanup_preserved_jobs_before_seconds_ago = 1.day
config.good_job.cleanup_interval_jobs = 100_000
config.good_job.cleanup_interval_seconds = 4.hours
# Configuration for the application, engines, and railties goes here.
#
# These settings can be overridden in specific environments using the files

View File

@@ -21,58 +21,242 @@ describe Domain::Fa::Job::BrowsePageJob do
end.to change { GoodJob::Job.count }.by(1)
end
it "works for a browse page with no submissions" do
SpecUtil.init_http_client_mock(
@http_client_mock, [
{
uri: "https://www.furaffinity.net/browse/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
caused_by_entry: nil,
},
]
)
context "with no posts found on page" do
before do
ActiveJob::Base.queue_adapter = :test
Subject.new.perform({})
@log_entries = SpecUtil.init_http_client_mock(
@http_client_mock, [
{
uri: "https://www.furaffinity.net/browse/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
caused_by_entry: nil,
},
]
)
end
it "requests only one page" do
Subject.new.perform({})
end
end
it "requests the next page if a submission is found" do
SpecUtil.init_http_client_mock(
@http_client_mock, [
{
uri: "https://www.furaffinity.net/browse/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_one_submission.html"),
caused_by_entry: nil,
},
{
uri: "https://www.furaffinity.net/browse/2/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
caused_by_entry: 0,
},
]
)
shared_context "user and post" do
let(:user) { proc { Domain::Fa::User.find_by(url_name: "ruby69r") } }
let(:post) { proc { Domain::Fa::Post.find_by(fa_id: 51509268) } }
before do
expect(post.call).to be_nil
expect(user.call).to be_nil
end
end
expect do
Subject.new.perform({})
end.to(
change { Domain::Fa::Post.count }.by(1).and(
change { Domain::Fa::User.count }.by(1)
shared_examples "enqueue post scan" do |does_enqueue|
it "enqueues post scan job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanPostJob)).to match([
including(args: [{
post: post.call,
caused_by_entry: @log_entries[0],
}]),
])
end if does_enqueue
it "does not enqueue post scan job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanPostJob)).to eq([])
end unless does_enqueue
end
shared_examples "enqueue file scan" do |does_enqueue|
it "enqueues file scan job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanFileJob)).to match([
including(args: [{
post: post.call,
caused_by_entry: @log_entries[0],
}]),
])
end if does_enqueue
it "does not enqueue post scan job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanFileJob)).to eq([])
end unless does_enqueue
end
shared_examples "enqueue user page scan" do |does_enqueue|
it "enqueues user page job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserPageJob)).to match([
including(args: [{
user: user.call,
caused_by_entry: @log_entries[0],
}]),
])
end if does_enqueue
it "does not enqueue user page job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserPageJob)).to eq([])
end unless does_enqueue
end
shared_examples "enqueue user gallery scan" do |does_enqueue|
it "enqueues user gallery job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserGalleryJob)).to match([
including(args: [{
user: user.call,
caused_by_entry: @log_entries[0],
}]),
])
end if does_enqueue
it "does not enqueue user gallery job" do
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserGalleryJob)).to eq([])
end unless does_enqueue
end
context "with one unseen post" do
include_context "user and post"
before do
ActiveJob::Base.queue_adapter = :test
@log_entries = SpecUtil.init_http_client_mock(
@http_client_mock, [
{
uri: "https://www.furaffinity.net/browse/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_one_submission.html"),
caused_by_entry_idx: nil,
},
{
uri: "https://www.furaffinity.net/browse/2/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
caused_by_entry_idx: 0,
},
]
)
)
end
creator = Domain::Fa::User.find_by(url_name: "ruby69r")
refute_nil creator
expect(creator.name).to eq("Ruby_69r")
it "creates a new post" do
expect {
Subject.new.perform({})
}.to change {
Domain::Fa::Post.count
}.by(1)
end
post = Domain::Fa::Post.find_by(fa_id: 51509268)
refute_nil post
expect(post.state).to eq("ok")
expect(post.title).to eq("reminder YCH AUCTION")
expect(post.creator).to eq(creator)
it "creates a new user" do
expect {
Subject.new.perform({})
}.to change {
Domain::Fa::Post.count
}.by(1)
end
it "creates a post with the right attributes" do
Subject.new.perform({})
expect(post.call.state).to eq("ok")
expect(post.call.title).to eq("reminder YCH AUCTION")
expect(post.call.creator).to eq(user.call)
end
it "creates a user with the right attributes" do
Subject.new.perform({})
expect(user.call.name).to eq("Ruby_69r")
end
end
context "with one seen post" do
include_context "user and post"
before do
ActiveJob::Base.queue_adapter = :test
creator = Domain::Fa::User.create!({
url_name: "ruby69r",
name: "Ruby_68r",
})
Domain::Fa::Post.create!({
fa_id: 51509268,
creator: creator,
})
@log_entries = SpecUtil.init_http_client_mock(
@http_client_mock, [
{
uri: "https://www.furaffinity.net/browse/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_one_submission.html"),
caused_by_entry_idx: nil,
},
]
)
end
context "and nothing yet scanned" do
before { described_class.new.perform({}) }
include_examples "enqueue post scan", true
include_examples "enqueue file scan", false
include_examples "enqueue user page scan", true
include_examples "enqueue user gallery scan", true
end
context "and post page scanned" do
before do
found_post = post.call
found_post.file_url_str = "http://www.example.com/img.jpg"
found_post.save!
described_class.new.perform({})
end
include_examples "enqueue post scan", false
include_examples "enqueue file scan", true
include_examples "enqueue user page scan", true
include_examples "enqueue user gallery scan", true
end
context "and post file scanned" do
before do
found_post = post.call
found_post.file_url_str = "http://www.example.com/foo.txt"
file = SpecUtil.build_http_log_entry(uri: found_post.file_uri)
file.save!
found_post.file = file
found_post.save!
described_class.new.perform({})
end
include_examples "enqueue post scan", false
include_examples "enqueue file scan", false
include_examples "enqueue user page scan", true
include_examples "enqueue user gallery scan", true
end
context "and user gallery already scanned" do
before do
creator = user.call
creator.scanned_gallery_at = 1.hour.ago
creator.save!
described_class.new.perform({})
end
include_examples "enqueue post scan", true
include_examples "enqueue file scan", false
include_examples "enqueue user page scan", true
include_examples "enqueue user gallery scan", false
end
context "and user page already scanned" do
before do
creator = user.call
creator.scanned_page_at = 1.hour.ago
creator.save!
described_class.new.perform({})
end
include_examples "enqueue post scan", true
include_examples "enqueue file scan", false
include_examples "enqueue user page scan", false
include_examples "enqueue user gallery scan", true
end
end
end

View File

@@ -51,7 +51,7 @@ class SpecUtil
end
def self.build_http_log_entry(
uri: "http://example.com",
uri: "http://example.com/",
status_code: 200,
content_type: "text/plain",
contents: nil
@@ -106,8 +106,8 @@ class SpecUtil
log_entries << log_entry
caused_by_entry = nil
if request[:caused_by_entry]
caused_by_entry = log_entries[request[:caused_by_entry]]
if request[:caused_by_entry_idx]
caused_by_entry = log_entries[request[:caused_by_entry_idx]]
end
expect(http_client_mock).to(receive(:get).
@@ -121,9 +121,30 @@ class SpecUtil
log_entry
)))
end
log_entries
end
def self.read_fixture_file(path)
File.read Rails.root.join("test/fixtures/files", path)
end
def self.enqueued_jobs(job_klass = nil)
unless ::ActiveJob::QueueAdapters::TestAdapter === ::ActiveJob::Base.queue_adapter
raise StandardError, "set `ActiveJob::Base.queue_adapter = :test`"
end
jobs = ActiveJob::Base.queue_adapter.enqueued_jobs
jobs.filter do |job|
if job_klass
job[:job] == job_klass
else
true
end
end.map do |job|
job.slice(:job, :queue, :priority).merge({
args: ::ActiveJob::Arguments.deserialize(job[:args]),
})
end
end
end