comprehensive tests for browse page job
This commit is contained in:
@@ -49,7 +49,7 @@ class Domain::Fa::Job::FaJobBase < Scraper::JobBase
|
||||
listing_page_stats.new_seen += 1 if post.new_record?
|
||||
listing_page_stats.total_seen += 1
|
||||
|
||||
update_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
||||
update_and_save_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
||||
enqueue_user_scan(
|
||||
post.creator,
|
||||
caused_by_entry,
|
||||
@@ -77,7 +77,7 @@ class Domain::Fa::Job::FaJobBase < Scraper::JobBase
|
||||
listing_page_stats
|
||||
end
|
||||
|
||||
def update_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
||||
def update_and_save_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
||||
if job_type == :browse_page
|
||||
post.log_entry_detail["first_browse_page_id"] ||= caused_by_entry.id
|
||||
elsif job_type == :gallery_page
|
||||
|
||||
@@ -22,6 +22,10 @@ class ColorLogger
|
||||
end
|
||||
|
||||
Logger.new(sink).tap do |logger|
|
||||
if Rails.env.test?
|
||||
logger.level = :error
|
||||
end
|
||||
|
||||
def logger.prefix=(p)
|
||||
@logger_prefix = p
|
||||
end
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
class Scraper::HttpFactory
|
||||
@http_clients = Concurrent::ThreadLocalVar.new() { {} }
|
||||
|
||||
def self.http_client_mock=(mock)
|
||||
@http_client_mock = mock
|
||||
end
|
||||
@@ -7,24 +9,19 @@ class Scraper::HttpFactory
|
||||
if Rails.env.test?
|
||||
@http_client_mock || raise("no mock set yet")
|
||||
else
|
||||
get_or_create_tvar_http_client(:fa, Scraper::FaHttpClientConfig.new)
|
||||
get_or_create_client(:fa, Scraper::FaHttpClientConfig.new)
|
||||
end
|
||||
end
|
||||
|
||||
def self.new_gallery_dl_client
|
||||
raise("not implemented yet")
|
||||
end
|
||||
|
||||
def self.get_or_create_tvar_http_client(key, config)
|
||||
attr_name = :"#{self.class.name.underscore}_#{key}_klass_http_client_tvar"
|
||||
thread = Thread.current
|
||||
if thread.thread_variable?(attr_name)
|
||||
thread.thread_variable_get(attr_name)
|
||||
else
|
||||
proxy_config = Rails.application.config.x.proxy || raise("no proxy config")
|
||||
performer = Scraper::HttpPerformer.new(proxy_config[:name], proxy_config[:http])
|
||||
http_client = Scraper::HttpClient.new(config, performer)
|
||||
thread.thread_variable_set(attr_name, http_client)
|
||||
http_client
|
||||
end
|
||||
def self.get_or_create_client(key, config)
|
||||
@http_clients.value[key] ||= begin
|
||||
proxy_config = Rails.application.config.x.proxy || raise("no proxy config")
|
||||
performer = Scraper::HttpPerformer.new(proxy_config[:name], proxy_config[:http])
|
||||
Scraper::HttpClient.new(config, performer)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -19,10 +19,15 @@ module ReduxScraper
|
||||
config.good_job.inline_execution_respects_schedule = true
|
||||
config.good_job.execution_mode = :async
|
||||
config.good_job.active_record_parent_class = "ReduxApplicationRecord"
|
||||
config.good_job.preserve_job_records = false
|
||||
config.good_job.retry_on_unhandled_error = true
|
||||
config.good_job.smaller_number_is_higher_priority = true
|
||||
|
||||
# job cleanup config - retain the last 24 hours of jobs
|
||||
config.good_job.preserve_job_records = true
|
||||
config.good_job.cleanup_preserved_jobs_before_seconds_ago = 1.day
|
||||
config.good_job.cleanup_interval_jobs = 100_000
|
||||
config.good_job.cleanup_interval_seconds = 4.hours
|
||||
|
||||
# Configuration for the application, engines, and railties goes here.
|
||||
#
|
||||
# These settings can be overridden in specific environments using the files
|
||||
|
||||
@@ -21,58 +21,242 @@ describe Domain::Fa::Job::BrowsePageJob do
|
||||
end.to change { GoodJob::Job.count }.by(1)
|
||||
end
|
||||
|
||||
it "works for a browse page with no submissions" do
|
||||
SpecUtil.init_http_client_mock(
|
||||
@http_client_mock, [
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
|
||||
caused_by_entry: nil,
|
||||
},
|
||||
]
|
||||
)
|
||||
context "with no posts found on page" do
|
||||
before do
|
||||
ActiveJob::Base.queue_adapter = :test
|
||||
|
||||
Subject.new.perform({})
|
||||
@log_entries = SpecUtil.init_http_client_mock(
|
||||
@http_client_mock, [
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
|
||||
caused_by_entry: nil,
|
||||
},
|
||||
]
|
||||
)
|
||||
end
|
||||
|
||||
it "requests only one page" do
|
||||
Subject.new.perform({})
|
||||
end
|
||||
end
|
||||
|
||||
it "requests the next page if a submission is found" do
|
||||
SpecUtil.init_http_client_mock(
|
||||
@http_client_mock, [
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_one_submission.html"),
|
||||
caused_by_entry: nil,
|
||||
},
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/2/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
|
||||
caused_by_entry: 0,
|
||||
},
|
||||
]
|
||||
)
|
||||
shared_context "user and post" do
|
||||
let(:user) { proc { Domain::Fa::User.find_by(url_name: "ruby69r") } }
|
||||
let(:post) { proc { Domain::Fa::Post.find_by(fa_id: 51509268) } }
|
||||
before do
|
||||
expect(post.call).to be_nil
|
||||
expect(user.call).to be_nil
|
||||
end
|
||||
end
|
||||
|
||||
expect do
|
||||
Subject.new.perform({})
|
||||
end.to(
|
||||
change { Domain::Fa::Post.count }.by(1).and(
|
||||
change { Domain::Fa::User.count }.by(1)
|
||||
shared_examples "enqueue post scan" do |does_enqueue|
|
||||
it "enqueues post scan job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanPostJob)).to match([
|
||||
including(args: [{
|
||||
post: post.call,
|
||||
caused_by_entry: @log_entries[0],
|
||||
}]),
|
||||
])
|
||||
end if does_enqueue
|
||||
|
||||
it "does not enqueue post scan job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanPostJob)).to eq([])
|
||||
end unless does_enqueue
|
||||
end
|
||||
|
||||
shared_examples "enqueue file scan" do |does_enqueue|
|
||||
it "enqueues file scan job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanFileJob)).to match([
|
||||
including(args: [{
|
||||
post: post.call,
|
||||
caused_by_entry: @log_entries[0],
|
||||
}]),
|
||||
])
|
||||
end if does_enqueue
|
||||
|
||||
it "does not enqueue post scan job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanFileJob)).to eq([])
|
||||
end unless does_enqueue
|
||||
end
|
||||
|
||||
shared_examples "enqueue user page scan" do |does_enqueue|
|
||||
it "enqueues user page job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserPageJob)).to match([
|
||||
including(args: [{
|
||||
user: user.call,
|
||||
caused_by_entry: @log_entries[0],
|
||||
}]),
|
||||
])
|
||||
end if does_enqueue
|
||||
|
||||
it "does not enqueue user page job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserPageJob)).to eq([])
|
||||
end unless does_enqueue
|
||||
end
|
||||
|
||||
shared_examples "enqueue user gallery scan" do |does_enqueue|
|
||||
it "enqueues user gallery job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserGalleryJob)).to match([
|
||||
including(args: [{
|
||||
user: user.call,
|
||||
caused_by_entry: @log_entries[0],
|
||||
}]),
|
||||
])
|
||||
end if does_enqueue
|
||||
|
||||
it "does not enqueue user gallery job" do
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserGalleryJob)).to eq([])
|
||||
end unless does_enqueue
|
||||
end
|
||||
|
||||
context "with one unseen post" do
|
||||
include_context "user and post"
|
||||
before do
|
||||
ActiveJob::Base.queue_adapter = :test
|
||||
@log_entries = SpecUtil.init_http_client_mock(
|
||||
@http_client_mock, [
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_one_submission.html"),
|
||||
caused_by_entry_idx: nil,
|
||||
},
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/2/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
|
||||
caused_by_entry_idx: 0,
|
||||
},
|
||||
]
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
creator = Domain::Fa::User.find_by(url_name: "ruby69r")
|
||||
refute_nil creator
|
||||
expect(creator.name).to eq("Ruby_69r")
|
||||
it "creates a new post" do
|
||||
expect {
|
||||
Subject.new.perform({})
|
||||
}.to change {
|
||||
Domain::Fa::Post.count
|
||||
}.by(1)
|
||||
end
|
||||
|
||||
post = Domain::Fa::Post.find_by(fa_id: 51509268)
|
||||
refute_nil post
|
||||
expect(post.state).to eq("ok")
|
||||
expect(post.title).to eq("reminder YCH AUCTION")
|
||||
expect(post.creator).to eq(creator)
|
||||
it "creates a new user" do
|
||||
expect {
|
||||
Subject.new.perform({})
|
||||
}.to change {
|
||||
Domain::Fa::Post.count
|
||||
}.by(1)
|
||||
end
|
||||
|
||||
it "creates a post with the right attributes" do
|
||||
Subject.new.perform({})
|
||||
expect(post.call.state).to eq("ok")
|
||||
expect(post.call.title).to eq("reminder YCH AUCTION")
|
||||
expect(post.call.creator).to eq(user.call)
|
||||
end
|
||||
|
||||
it "creates a user with the right attributes" do
|
||||
Subject.new.perform({})
|
||||
expect(user.call.name).to eq("Ruby_69r")
|
||||
end
|
||||
end
|
||||
|
||||
context "with one seen post" do
|
||||
include_context "user and post"
|
||||
before do
|
||||
ActiveJob::Base.queue_adapter = :test
|
||||
|
||||
creator = Domain::Fa::User.create!({
|
||||
url_name: "ruby69r",
|
||||
name: "Ruby_68r",
|
||||
})
|
||||
Domain::Fa::Post.create!({
|
||||
fa_id: 51509268,
|
||||
creator: creator,
|
||||
})
|
||||
|
||||
@log_entries = SpecUtil.init_http_client_mock(
|
||||
@http_client_mock, [
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_one_submission.html"),
|
||||
caused_by_entry_idx: nil,
|
||||
},
|
||||
]
|
||||
)
|
||||
end
|
||||
|
||||
context "and nothing yet scanned" do
|
||||
before { described_class.new.perform({}) }
|
||||
include_examples "enqueue post scan", true
|
||||
include_examples "enqueue file scan", false
|
||||
include_examples "enqueue user page scan", true
|
||||
include_examples "enqueue user gallery scan", true
|
||||
end
|
||||
|
||||
context "and post page scanned" do
|
||||
before do
|
||||
found_post = post.call
|
||||
found_post.file_url_str = "http://www.example.com/img.jpg"
|
||||
found_post.save!
|
||||
described_class.new.perform({})
|
||||
end
|
||||
|
||||
include_examples "enqueue post scan", false
|
||||
include_examples "enqueue file scan", true
|
||||
include_examples "enqueue user page scan", true
|
||||
include_examples "enqueue user gallery scan", true
|
||||
end
|
||||
|
||||
context "and post file scanned" do
|
||||
before do
|
||||
found_post = post.call
|
||||
found_post.file_url_str = "http://www.example.com/foo.txt"
|
||||
file = SpecUtil.build_http_log_entry(uri: found_post.file_uri)
|
||||
file.save!
|
||||
found_post.file = file
|
||||
found_post.save!
|
||||
described_class.new.perform({})
|
||||
end
|
||||
|
||||
include_examples "enqueue post scan", false
|
||||
include_examples "enqueue file scan", false
|
||||
include_examples "enqueue user page scan", true
|
||||
include_examples "enqueue user gallery scan", true
|
||||
end
|
||||
|
||||
context "and user gallery already scanned" do
|
||||
before do
|
||||
creator = user.call
|
||||
creator.scanned_gallery_at = 1.hour.ago
|
||||
creator.save!
|
||||
described_class.new.perform({})
|
||||
end
|
||||
|
||||
include_examples "enqueue post scan", true
|
||||
include_examples "enqueue file scan", false
|
||||
include_examples "enqueue user page scan", true
|
||||
include_examples "enqueue user gallery scan", false
|
||||
end
|
||||
|
||||
context "and user page already scanned" do
|
||||
before do
|
||||
creator = user.call
|
||||
creator.scanned_page_at = 1.hour.ago
|
||||
creator.save!
|
||||
described_class.new.perform({})
|
||||
end
|
||||
|
||||
include_examples "enqueue post scan", true
|
||||
include_examples "enqueue file scan", false
|
||||
include_examples "enqueue user page scan", false
|
||||
include_examples "enqueue user gallery scan", true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -51,7 +51,7 @@ class SpecUtil
|
||||
end
|
||||
|
||||
def self.build_http_log_entry(
|
||||
uri: "http://example.com",
|
||||
uri: "http://example.com/",
|
||||
status_code: 200,
|
||||
content_type: "text/plain",
|
||||
contents: nil
|
||||
@@ -106,8 +106,8 @@ class SpecUtil
|
||||
log_entries << log_entry
|
||||
|
||||
caused_by_entry = nil
|
||||
if request[:caused_by_entry]
|
||||
caused_by_entry = log_entries[request[:caused_by_entry]]
|
||||
if request[:caused_by_entry_idx]
|
||||
caused_by_entry = log_entries[request[:caused_by_entry_idx]]
|
||||
end
|
||||
|
||||
expect(http_client_mock).to(receive(:get).
|
||||
@@ -121,9 +121,30 @@ class SpecUtil
|
||||
log_entry
|
||||
)))
|
||||
end
|
||||
|
||||
log_entries
|
||||
end
|
||||
|
||||
def self.read_fixture_file(path)
|
||||
File.read Rails.root.join("test/fixtures/files", path)
|
||||
end
|
||||
|
||||
def self.enqueued_jobs(job_klass = nil)
|
||||
unless ::ActiveJob::QueueAdapters::TestAdapter === ::ActiveJob::Base.queue_adapter
|
||||
raise StandardError, "set `ActiveJob::Base.queue_adapter = :test`"
|
||||
end
|
||||
|
||||
jobs = ActiveJob::Base.queue_adapter.enqueued_jobs
|
||||
jobs.filter do |job|
|
||||
if job_klass
|
||||
job[:job] == job_klass
|
||||
else
|
||||
true
|
||||
end
|
||||
end.map do |job|
|
||||
job.slice(:job, :queue, :priority).merge({
|
||||
args: ::ActiveJob::Arguments.deserialize(job[:args]),
|
||||
})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user