329 lines
10 KiB
Ruby
329 lines
10 KiB
Ruby
# typed: false
|
|
require "rails_helper"
|
|
|
|
describe Domain::Fa::Job::ScanPostJob do
|
|
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
|
|
before do
|
|
Scraper::ClientFactory.http_client_mock = http_client_mock
|
|
@log_entries =
|
|
HttpClientMockHelpers.init_http_client_mock(
|
|
http_client_mock,
|
|
client_mock_config,
|
|
)
|
|
end
|
|
|
|
let(:post_404_fixture) do
|
|
SpecUtil.read_fixture_file("domain/fa/submission/submission_404_page.html")
|
|
end
|
|
|
|
shared_context "post 59_714_213 already exists" do
|
|
let!(:post) { Domain::Post::FaPost.create!(fa_id: 59_714_213) }
|
|
end
|
|
|
|
shared_context "post with no keywords fixture" do
|
|
let(:post_fixture) do
|
|
SpecUtil.read_fixture_file(
|
|
"domain/fa/submission/submission_page_59714213.html",
|
|
)
|
|
end
|
|
|
|
let(:client_mock_config) do
|
|
[
|
|
{
|
|
uri: "https://www.furaffinity.net/view/59714213/",
|
|
requested_at: Time.parse("Jan 31, 2025 16:17 UTC"),
|
|
status_code: 200,
|
|
content_type: "text/html",
|
|
contents: post_fixture,
|
|
},
|
|
]
|
|
end
|
|
end
|
|
|
|
shared_context "post with keywords fixture" do
|
|
let(:post_fixture) do
|
|
SpecUtil.read_fixture_file(
|
|
"domain/fa/submission/submission_page_59723907.html",
|
|
)
|
|
end
|
|
|
|
let(:client_mock_config) do
|
|
[
|
|
{
|
|
uri: "https://www.furaffinity.net/view/59723907/",
|
|
status_code: 200,
|
|
content_type: "text/html",
|
|
requested_at: Time.parse("Feb 1, 2025 15:45 UTC"),
|
|
contents: post_fixture,
|
|
},
|
|
]
|
|
end
|
|
end
|
|
|
|
shared_context "creator is creeps" do
|
|
let!(:creator) do
|
|
Domain::User::FaUser.create!(name: "-creeps", url_name: "-creeps")
|
|
end
|
|
end
|
|
|
|
shared_context "creator is lizardlars" do
|
|
let!(:creator) do
|
|
Domain::User::FaUser.create!(name: "lizardlars", url_name: "lizardlars")
|
|
end
|
|
end
|
|
|
|
context "when the creator does not exist" do
|
|
include_context "post with no keywords fixture"
|
|
|
|
it "creates a new creator" do
|
|
expect do perform_now({ fa_id: 59_714_213 }) end.to change(
|
|
Domain::User::FaUser,
|
|
:count,
|
|
).by(1)
|
|
end
|
|
|
|
it "enqueues a user page job" do
|
|
creator = Domain::User::FaUser.find_by(url_name: "-creeps")
|
|
expect(creator).to be_nil
|
|
perform_now({ fa_id: 59_714_213 })
|
|
creator = Domain::User::FaUser.find_by(url_name: "-creeps")
|
|
expect(creator).not_to be_nil
|
|
expect(
|
|
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
|
|
).to include(hash_including({ user: creator }))
|
|
end
|
|
end
|
|
|
|
context "when scanning a post" do
|
|
include_context "post with no keywords fixture"
|
|
include_context "creator is creeps"
|
|
|
|
context "and the post does not exist" do
|
|
it "creates a new post with correct attributes" do
|
|
perform_now({ fa_id: 59_714_213 })
|
|
|
|
post = Domain::Post::FaPost.find_by(fa_id: 59_714_213)
|
|
expect(post).not_to be_nil
|
|
expect(post.state).to eq("ok")
|
|
expect(post.last_submission_log_entry).to eq(@log_entries.first)
|
|
expect(post.title).to eq("Slayer")
|
|
expect(post.creator).not_to be_nil
|
|
expect(post.creator.url_name).to eq("-creeps")
|
|
expect(post.creator.name).to eq("-creeps")
|
|
expect(post.description).to include("My newest oil painting")
|
|
expect(post.category).to eq("All")
|
|
expect(post.theme).to eq("All")
|
|
expect(post.species).to eq("Unspecified / Any")
|
|
expect(post.gender).to eq("Any")
|
|
expect(post.num_favorites).to eq(0)
|
|
expect(post.num_comments).to eq(0)
|
|
expect(post.num_views).to eq(3)
|
|
expect(post.file.url_str).to eq(
|
|
"https://d.furaffinity.net/art/-creeps/1738343855/1738343855.-creeps_slayerlr.jpg",
|
|
)
|
|
expect(post.posted_at).to be_within(1.second).of(
|
|
Time.parse("Jan 31, 2025 12:17 PM -08:00"),
|
|
)
|
|
expect(post.scanned_at).to be_within(3.seconds).of(Time.now)
|
|
end
|
|
end
|
|
|
|
context "and the post exists but has not been scanned yet" do
|
|
include_context "post 59_714_213 already exists"
|
|
|
|
it "updates the post attributes" do
|
|
post.update!(title: "old title")
|
|
expect do
|
|
perform_now({ fa_id: 59_714_213 })
|
|
post.reload
|
|
end.to change(post, :title)
|
|
.from("old title")
|
|
.to("Slayer")
|
|
.and(
|
|
change(post, :scanned_at).from(nil).to(
|
|
be_within(3.seconds).of(Time.now),
|
|
),
|
|
)
|
|
.and(not_change(post, :state))
|
|
end
|
|
end
|
|
|
|
it "enqueues a file scan job" do
|
|
perform_now({ fa_id: 59_714_213 })
|
|
|
|
post = Domain::Post::FaPost.find_by(fa_id: 59_714_213)
|
|
expect(SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanFileJob)).to match(
|
|
[{ file: post.file, caused_by_entry: @log_entries.first }],
|
|
)
|
|
end
|
|
end
|
|
|
|
context "when scanning a post from the September 2025 update" do
|
|
let(:client_mock_config) do
|
|
[
|
|
{
|
|
uri: "https://www.furaffinity.net/view/62198493/",
|
|
status_code: 200,
|
|
content_type: "text/html",
|
|
contents:
|
|
SpecUtil.read_fixture_file(
|
|
"domain/fa/submission/submission_page_62198493_2025-09-07.html",
|
|
),
|
|
},
|
|
]
|
|
end
|
|
|
|
it "updates the post attributes" do
|
|
perform_now({ fa_id: 62_198_493 })
|
|
post = Domain::Post::FaPost.find_by(fa_id: 62_198_493)
|
|
expect(post.state).to eq("ok")
|
|
expect(post.title).to eq("harlan")
|
|
expect(post.creator).to eq(
|
|
Domain::User::FaUser.find_by(url_name: "charkiexd"),
|
|
)
|
|
expect(post.posted_at).to be_within(1.second).of(
|
|
Time.parse("Sep 6, 2025 9:07:01 PM -07:00"),
|
|
)
|
|
end
|
|
end
|
|
|
|
context "when scanning an already scanned post" do
|
|
include_context "post 59_714_213 already exists"
|
|
include_context "creator is creeps"
|
|
let(:client_mock_config) { [] }
|
|
before do
|
|
post.build_file(url_str: "https://example.com/image.jpg")
|
|
post.scanned_at = 1.day.ago
|
|
post.file.save!
|
|
post.save!
|
|
end
|
|
|
|
it "does not perform a new scan" do
|
|
expect do
|
|
perform_now({ post: post })
|
|
post.reload
|
|
end.to change { post.scanned_at }.by_at_most(1.second)
|
|
end
|
|
end
|
|
|
|
context "the post has keywords" do
|
|
include_context "post with keywords fixture"
|
|
include_context "creator is lizardlars"
|
|
|
|
it "updates the post attributes" do
|
|
fa_id = 59_723_907
|
|
perform_now({ fa_id: fa_id })
|
|
post = Domain::Post::FaPost.find_by(fa_id: fa_id)
|
|
expect(post.state).to eq("ok")
|
|
expect(post.title).to eq(
|
|
"Oh No! Stuck With Her Step Bro | Chapter 2 | Page 8",
|
|
)
|
|
expect(post.creator).to eq(creator)
|
|
expect(post.posted_at).to be_within(1.second).of(
|
|
Time.parse("Feb 1, 2025 07:15 AM -08:00"),
|
|
)
|
|
expect(post.keywords).to match(array_including("wolfgang", "kiss"))
|
|
expect(post.scanned_at).to be_within(3.seconds).of(Time.now)
|
|
end
|
|
end
|
|
|
|
context "when post is removed" do
|
|
include_context "post 59_714_213 already exists"
|
|
|
|
let(:client_mock_config) do
|
|
[
|
|
{
|
|
uri: "https://www.furaffinity.net/view/59714213/",
|
|
status_code: 200,
|
|
content_type: "text/html",
|
|
contents: post_404_fixture,
|
|
},
|
|
]
|
|
end
|
|
|
|
it "marks the post as removed" do
|
|
perform_now({ post: post })
|
|
|
|
post.reload
|
|
expect(post.state).to eq("removed")
|
|
end
|
|
end
|
|
|
|
describe "#uri_same_with_normalized_facdn_host?" do
|
|
let(:client_mock_config) { [] }
|
|
|
|
shared_examples "has result" do |result|
|
|
it "is #{result.to_s}, both have schema" do
|
|
url1 = "https://#{host1}#{path1}"
|
|
url2 = "https://#{host2}#{path2}"
|
|
expect(
|
|
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
|
|
).to eq(result)
|
|
end
|
|
|
|
it "is #{result.to_s}, both missing schema" do
|
|
url1 = "//#{host1}#{path1}"
|
|
url2 = "//#{host2}#{path2}"
|
|
expect(
|
|
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
|
|
).to eq(result)
|
|
end
|
|
|
|
it "is #{result.to_s}, one has schema" do
|
|
url1 = "https://#{host1}#{path1}"
|
|
url2 = "//#{host2}#{path2}"
|
|
expect(
|
|
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
|
|
).to eq(result)
|
|
end
|
|
end
|
|
|
|
shared_context "host: different cdn hosts" do
|
|
let(:host1) { "d.facdn.net" }
|
|
let(:host2) { "d.furaffinity.net" }
|
|
end
|
|
|
|
shared_context "host: both hosts are d.facdn.net" do
|
|
let(:host1) { "d.facdn.net" }
|
|
let(:host2) { "d.facdn.net" }
|
|
end
|
|
|
|
shared_context "host: both hosts are d.furaffinity.net" do
|
|
let(:host1) { "d.furaffinity.net" }
|
|
let(:host2) { "d.furaffinity.net" }
|
|
end
|
|
|
|
shared_context "host: one domain is not a cdn" do
|
|
let(:host1) { "d.facdn.net" }
|
|
let(:host2) { "example.com" }
|
|
end
|
|
|
|
shared_context "paths: are the same" do
|
|
let(:path1) { "/art/user/1234567890/image.jpg" }
|
|
let(:path2) { "/art/user/1234567890/image.jpg" }
|
|
end
|
|
|
|
shared_context "paths: are different" do
|
|
let(:path1) { "/art/user/1234567890/image.jpg" }
|
|
let(:path2) { "/art/user/1234567890/some_other_image.jpg" }
|
|
end
|
|
|
|
[
|
|
["host: different cdn hosts", "paths: are the same", true],
|
|
["host: both hosts are d.facdn.net", "paths: are the same", true],
|
|
["host: both hosts are d.furaffinity.net", "paths: are the same", true],
|
|
["host: one domain is not a cdn", "paths: are the same", false],
|
|
["host: different cdn hosts", "paths: are different", false],
|
|
["host: both hosts are d.facdn.net", "paths: are different", false],
|
|
["host: both hosts are d.furaffinity.net", "paths: are different", false],
|
|
["host: one domain is not a cdn", "paths: are different", false],
|
|
].each do |host_context, path_context, result|
|
|
context "#{host_context} and #{path_context}" do
|
|
include_context host_context
|
|
include_context path_context
|
|
include_examples "has result", result
|
|
end
|
|
end
|
|
end
|
|
end
|