Files
redux-scraper/spec/jobs/domain/fa/job/scan_post_job_spec.rb
2025-09-07 18:27:22 +00:00

329 lines
10 KiB
Ruby

# typed: false
require "rails_helper"
describe Domain::Fa::Job::ScanPostJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before do
Scraper::ClientFactory.http_client_mock = http_client_mock
@log_entries =
HttpClientMockHelpers.init_http_client_mock(
http_client_mock,
client_mock_config,
)
end
let(:post_404_fixture) do
SpecUtil.read_fixture_file("domain/fa/submission/submission_404_page.html")
end
shared_context "post 59_714_213 already exists" do
let!(:post) { Domain::Post::FaPost.create!(fa_id: 59_714_213) }
end
shared_context "post with no keywords fixture" do
let(:post_fixture) do
SpecUtil.read_fixture_file(
"domain/fa/submission/submission_page_59714213.html",
)
end
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/view/59714213/",
requested_at: Time.parse("Jan 31, 2025 16:17 UTC"),
status_code: 200,
content_type: "text/html",
contents: post_fixture,
},
]
end
end
shared_context "post with keywords fixture" do
let(:post_fixture) do
SpecUtil.read_fixture_file(
"domain/fa/submission/submission_page_59723907.html",
)
end
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/view/59723907/",
status_code: 200,
content_type: "text/html",
requested_at: Time.parse("Feb 1, 2025 15:45 UTC"),
contents: post_fixture,
},
]
end
end
shared_context "creator is creeps" do
let!(:creator) do
Domain::User::FaUser.create!(name: "-creeps", url_name: "-creeps")
end
end
shared_context "creator is lizardlars" do
let!(:creator) do
Domain::User::FaUser.create!(name: "lizardlars", url_name: "lizardlars")
end
end
context "when the creator does not exist" do
include_context "post with no keywords fixture"
it "creates a new creator" do
expect do perform_now({ fa_id: 59_714_213 }) end.to change(
Domain::User::FaUser,
:count,
).by(1)
end
it "enqueues a user page job" do
creator = Domain::User::FaUser.find_by(url_name: "-creeps")
expect(creator).to be_nil
perform_now({ fa_id: 59_714_213 })
creator = Domain::User::FaUser.find_by(url_name: "-creeps")
expect(creator).not_to be_nil
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
).to include(hash_including({ user: creator }))
end
end
context "when scanning a post" do
include_context "post with no keywords fixture"
include_context "creator is creeps"
context "and the post does not exist" do
it "creates a new post with correct attributes" do
perform_now({ fa_id: 59_714_213 })
post = Domain::Post::FaPost.find_by(fa_id: 59_714_213)
expect(post).not_to be_nil
expect(post.state).to eq("ok")
expect(post.last_submission_log_entry).to eq(@log_entries.first)
expect(post.title).to eq("Slayer")
expect(post.creator).not_to be_nil
expect(post.creator.url_name).to eq("-creeps")
expect(post.creator.name).to eq("-creeps")
expect(post.description).to include("My newest oil painting")
expect(post.category).to eq("All")
expect(post.theme).to eq("All")
expect(post.species).to eq("Unspecified / Any")
expect(post.gender).to eq("Any")
expect(post.num_favorites).to eq(0)
expect(post.num_comments).to eq(0)
expect(post.num_views).to eq(3)
expect(post.file.url_str).to eq(
"https://d.furaffinity.net/art/-creeps/1738343855/1738343855.-creeps_slayerlr.jpg",
)
expect(post.posted_at).to be_within(1.second).of(
Time.parse("Jan 31, 2025 12:17 PM -08:00"),
)
expect(post.scanned_at).to be_within(3.seconds).of(Time.now)
end
end
context "and the post exists but has not been scanned yet" do
include_context "post 59_714_213 already exists"
it "updates the post attributes" do
post.update!(title: "old title")
expect do
perform_now({ fa_id: 59_714_213 })
post.reload
end.to change(post, :title)
.from("old title")
.to("Slayer")
.and(
change(post, :scanned_at).from(nil).to(
be_within(3.seconds).of(Time.now),
),
)
.and(not_change(post, :state))
end
end
it "enqueues a file scan job" do
perform_now({ fa_id: 59_714_213 })
post = Domain::Post::FaPost.find_by(fa_id: 59_714_213)
expect(SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanFileJob)).to match(
[{ file: post.file, caused_by_entry: @log_entries.first }],
)
end
end
context "when scanning a post from the September 2025 update" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/view/62198493/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/submission/submission_page_62198493_2025-09-07.html",
),
},
]
end
it "updates the post attributes" do
perform_now({ fa_id: 62_198_493 })
post = Domain::Post::FaPost.find_by(fa_id: 62_198_493)
expect(post.state).to eq("ok")
expect(post.title).to eq("harlan")
expect(post.creator).to eq(
Domain::User::FaUser.find_by(url_name: "charkiexd"),
)
expect(post.posted_at).to be_within(1.second).of(
Time.parse("Sep 6, 2025 9:07:01 PM -07:00"),
)
end
end
context "when scanning an already scanned post" do
include_context "post 59_714_213 already exists"
include_context "creator is creeps"
let(:client_mock_config) { [] }
before do
post.build_file(url_str: "https://example.com/image.jpg")
post.scanned_at = 1.day.ago
post.file.save!
post.save!
end
it "does not perform a new scan" do
expect do
perform_now({ post: post })
post.reload
end.to change { post.scanned_at }.by_at_most(1.second)
end
end
context "the post has keywords" do
include_context "post with keywords fixture"
include_context "creator is lizardlars"
it "updates the post attributes" do
fa_id = 59_723_907
perform_now({ fa_id: fa_id })
post = Domain::Post::FaPost.find_by(fa_id: fa_id)
expect(post.state).to eq("ok")
expect(post.title).to eq(
"Oh No! Stuck With Her Step Bro | Chapter 2 | Page 8",
)
expect(post.creator).to eq(creator)
expect(post.posted_at).to be_within(1.second).of(
Time.parse("Feb 1, 2025 07:15 AM -08:00"),
)
expect(post.keywords).to match(array_including("wolfgang", "kiss"))
expect(post.scanned_at).to be_within(3.seconds).of(Time.now)
end
end
context "when post is removed" do
include_context "post 59_714_213 already exists"
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/view/59714213/",
status_code: 200,
content_type: "text/html",
contents: post_404_fixture,
},
]
end
it "marks the post as removed" do
perform_now({ post: post })
post.reload
expect(post.state).to eq("removed")
end
end
describe "#uri_same_with_normalized_facdn_host?" do
let(:client_mock_config) { [] }
shared_examples "has result" do |result|
it "is #{result.to_s}, both have schema" do
url1 = "https://#{host1}#{path1}"
url2 = "https://#{host2}#{path2}"
expect(
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
).to eq(result)
end
it "is #{result.to_s}, both missing schema" do
url1 = "//#{host1}#{path1}"
url2 = "//#{host2}#{path2}"
expect(
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
).to eq(result)
end
it "is #{result.to_s}, one has schema" do
url1 = "https://#{host1}#{path1}"
url2 = "//#{host2}#{path2}"
expect(
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
).to eq(result)
end
end
shared_context "host: different cdn hosts" do
let(:host1) { "d.facdn.net" }
let(:host2) { "d.furaffinity.net" }
end
shared_context "host: both hosts are d.facdn.net" do
let(:host1) { "d.facdn.net" }
let(:host2) { "d.facdn.net" }
end
shared_context "host: both hosts are d.furaffinity.net" do
let(:host1) { "d.furaffinity.net" }
let(:host2) { "d.furaffinity.net" }
end
shared_context "host: one domain is not a cdn" do
let(:host1) { "d.facdn.net" }
let(:host2) { "example.com" }
end
shared_context "paths: are the same" do
let(:path1) { "/art/user/1234567890/image.jpg" }
let(:path2) { "/art/user/1234567890/image.jpg" }
end
shared_context "paths: are different" do
let(:path1) { "/art/user/1234567890/image.jpg" }
let(:path2) { "/art/user/1234567890/some_other_image.jpg" }
end
[
["host: different cdn hosts", "paths: are the same", true],
["host: both hosts are d.facdn.net", "paths: are the same", true],
["host: both hosts are d.furaffinity.net", "paths: are the same", true],
["host: one domain is not a cdn", "paths: are the same", false],
["host: different cdn hosts", "paths: are different", false],
["host: both hosts are d.facdn.net", "paths: are different", false],
["host: both hosts are d.furaffinity.net", "paths: are different", false],
["host: one domain is not a cdn", "paths: are different", false],
].each do |host_context, path_context, result|
context "#{host_context} and #{path_context}" do
include_context host_context
include_context path_context
include_examples "has result", result
end
end
end
end