fuzzysearch handle no sha256 for file

This commit is contained in:
Dylan Knutson
2025-07-25 04:20:40 +00:00
parent dca8ba4566
commit f11a5782e1
7 changed files with 104 additions and 83 deletions

View File

@@ -45,7 +45,7 @@ class Domain::Fa::Job::ScanFuzzysearchJob < Domain::Fa::Job::Base
post.fuzzysearch_checked_at = Time.now post.fuzzysearch_checked_at = Time.now
if response.is_a?(HttpLogEntry) if response.is_a?(HttpLogEntry)
post.fuzzysearch_entry = response post.fuzzysearch_entry = response
logger.error("fuzzysearch query failed") logger.error("fuzzysearch query failed or returned no results")
return return
end end

View File

@@ -6,11 +6,18 @@ class Scraper::ClientFactory
@gallery_dl_clients = Concurrent::ThreadLocalVar.new(nil) @gallery_dl_clients = Concurrent::ThreadLocalVar.new(nil)
# for testing only # for testing only
sig { params(mock: T.nilable(Scraper::HttpClient)).void }
def self.http_client_mock=(mock) def self.http_client_mock=(mock)
raise unless Rails.env.test? raise unless Rails.env.test?
@http_client_mock = mock @http_client_mock = mock
end end
sig { returns(T.nilable(Scraper::HttpClient)) }
def self.http_client_mock
raise unless Rails.env.test?
@http_client_mock
end
def self.gallery_dl_client_mock=(mock) def self.gallery_dl_client_mock=(mock)
raise unless Rails.env.test? raise unless Rails.env.test?
@gallery_dl_client_mock = mock @gallery_dl_client_mock = mock

View File

@@ -23,7 +23,7 @@ class Scraper::FuzzysearchApiClient
const :artist_name, String const :artist_name, String
const :deleted, T::Boolean const :deleted, T::Boolean
const :file_url, String const :file_url, String
const :file_sha256, String const :file_sha256, T.nilable(String)
const :tags, T::Array[String] const :tags, T::Array[String]
end end
@@ -33,12 +33,20 @@ class Scraper::FuzzysearchApiClient
url.query_values = { search: fa_id.to_s } url.query_values = { search: fa_id.to_s }
response = @http_client.get(url) response = @http_client.get(url)
return :rate_limit_exceeded if response.status_code == 429 if response.status_code == 429
logger.error(
format_tags(
make_tag("status_code", response.status_code),
"fuzzysearch rate limit exceeded",
),
)
return :rate_limit_exceeded
end
if response.status_code != 200 if response.status_code != 200
logger.error( logger.error(
format_tags( format_tags(
make_tag("status_code", response.status_code), make_tag("status_code", response.status_code),
make_tag("uri", url.to_s),
"fuzzysearch query failed", "fuzzysearch query failed",
), ),
) )

View File

@@ -1,5 +1,6 @@
# typed: true # typed: true
class HttpClientMockHelpers class HttpClientMockHelpers
extend T::Sig
include HasColorLogger include HasColorLogger
extend FactoryBot::Syntax::Methods extend FactoryBot::Syntax::Methods
extend RSpec::Mocks::ExampleMethods extend RSpec::Mocks::ExampleMethods
@@ -41,6 +42,13 @@ class HttpClientMockHelpers
) )
end end
sig do
params(
http_client_mock: Scraper::HttpClient,
requests: T::Array[T.untyped],
any_order: T::Boolean,
).returns(T::Array[HttpLogEntry])
end
def self.init_http_client_mock(http_client_mock, requests, any_order: false) def self.init_http_client_mock(http_client_mock, requests, any_order: false)
if any_order if any_order
init_any_order(http_client_mock, requests) init_any_order(http_client_mock, requests)
@@ -49,6 +57,19 @@ class HttpClientMockHelpers
end end
end end
sig do
params(requests: T::Array[T.untyped], any_order: T::Boolean).returns(
T::Array[HttpLogEntry],
)
end
def self.init_with(requests, any_order: false)
if any_order
init_any_order(Scraper::ClientFactory.http_client_mock, requests)
else
init_ordered(Scraper::ClientFactory.http_client_mock, requests)
end
end
def self.init_ordered(http_client_mock, requests) def self.init_ordered(http_client_mock, requests)
log_entries = [] log_entries = []

View File

@@ -2,33 +2,20 @@
require "rails_helper" require "rails_helper"
describe Domain::Fa::Job::ScanFuzzysearchJob do describe Domain::Fa::Job::ScanFuzzysearchJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
let(:client_mock_config) { [] }
let!(:log_entries) do let!(:log_entries) do
HttpClientMockHelpers.init_http_client_mock( HttpClientMockHelpers.init_with(
http_client_mock, [
client_mock_config, {
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code:,
content_type: "application/json",
contents: File.read("test/fixtures/files/fuzzysearch/#{fa_id}.json"),
},
],
) )
end end
let(:fuzzysearch_response_51015903) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/51015903.json"))
end
let(:fuzzysearch_response_21275696) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/21275696.json"))
end
let(:fuzzysearch_response_53068507) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/53068507.json"))
end
let(:fuzzysearch_response_61665194) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/61665194.json"))
end
describe "post was marked removed" do describe "post was marked removed" do
let(:post) do let(:post) do
create( create(
@@ -41,17 +28,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and fuzzysearch has post info" do context "and fuzzysearch has post info" do
let(:fa_id) { 51_015_903 } let(:fa_id) { 51_015_903 }
let(:client_mock_config) do let(:status_code) { 200 }
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_51015903.to_json,
},
]
end
it "updates the post" do it "updates the post" do
perform_now({ post: }) perform_now({ post: })
@@ -83,6 +60,14 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
expect(post.creator.url_name).to eq("crimetxt") expect(post.creator.url_name).to eq("crimetxt")
end end
it "sets the file sha256" do
perform_now({ post: })
post.reload
expect(post.fuzzysearch_json["sha256"]).to eq(
"d488dabd8eb22398a228fb662eb520bb4daaac3a9ab0dc9be8b8c5e1b9522efb",
)
end
it "enqueues a fur archiver post file job" do it "enqueues a fur archiver post file job" do
perform_now({ post: }) perform_now({ post: })
post.reload post.reload
@@ -110,17 +95,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and fuzzysearch has no post info" do context "and fuzzysearch has no post info" do
let(:fa_id) { 21_275_696 } let(:fa_id) { 21_275_696 }
let(:client_mock_config) do let(:status_code) { 200 }
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_21275696.to_json,
},
]
end
it "does not set the creator" do it "does not set the creator" do
perform_now({ post: }) perform_now({ post: })
@@ -137,17 +112,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and the artist name has capitalizations" do context "and the artist name has capitalizations" do
let(:fa_id) { 53_068_507 } let(:fa_id) { 53_068_507 }
let(:client_mock_config) do let(:status_code) { 200 }
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_53068507.to_json,
},
]
end
it "sets the creator" do it "sets the creator" do
perform_now({ post: }) perform_now({ post: })
@@ -161,17 +126,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and the post has a story url" do context "and the post has a story url" do
let(:fa_id) { 61_665_194 } let(:fa_id) { 61_665_194 }
let(:client_mock_config) do let(:status_code) { 200 }
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_61665194.to_json,
},
]
end
it "does not change the post state" do it "does not change the post state" do
perform_now({ post: }) perform_now({ post: })
@@ -186,7 +141,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
expect(post.creator.url_name).to eq("irontankris") expect(post.creator.url_name).to eq("irontankris")
end end
it "updates keywords", quiet: false do it "updates keywords" do
post.keywords = [] post.keywords = []
post.save! post.save!
perform_now({ post: }) perform_now({ post: })
@@ -194,5 +149,23 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
expect(post.keywords).to include("female", "mlp", "little", "anthro") expect(post.keywords).to include("female", "mlp", "little", "anthro")
end end
end end
context "the post has no sha256" do
let(:fa_id) { 61_429_615 }
let(:status_code) { 200 }
it "sets the artist" do
perform_now({ post: })
post.reload
expect(post.creator).to be_present
expect(post.creator.url_name).to eq("622000")
end
it "does not set the sha256" do
perform_now({ post: })
post.reload
expect(post.fuzzysearch_json["sha256"]).to be_nil
end
end
end end
end end

View File

@@ -44,17 +44,11 @@ RSpec.configure do |config|
end end
end end
# this breaks parallel tests because it's not thread safe # Reset http client mock before each test
# config.before(:all) do config.before(:each) do
# # safeguard against running this test in a non-test environment Scraper::ClientFactory.http_client_mock =
# root_dir = T.unsafe(self).instance_double("::Scraper::HttpClient")
# File.absolute_path(Rails.application.config_for("blob_file_location")) end
# if root_dir.match?(%r{^#{Rails.root}/tmp})
# FileUtils.rm_rf(root_dir)
# else
# raise "blob_file_location is not in the tmp directory"
# end
# end
# rspec-expectations config goes here. You can use an alternate # rspec-expectations config goes here. You can use an alternate
# assertion/expectation library such as wrong or the stdlib/minitest # assertion/expectation library such as wrong or the stdlib/minitest

View File

@@ -0,0 +1,18 @@
[
{
"id": 61429615,
"file_id": 1751417221,
"artist": "622000",
"hash": null,
"hash_str": null,
"url": "https://d.furaffinity.net/art/622000/1751417221/1751417221.622000_1000007071.png",
"filename": "1751417221.622000_1000007071.png",
"rating": "general",
"posted_at": "2025-07-02T00:47:00Z",
"file_size": null,
"sha256": null,
"updated_at": null,
"deleted": false,
"tags": ["oc", "male", "beastmen"]
}
]