fuzzysearch handle no sha256 for file

This commit is contained in:
Dylan Knutson
2025-07-25 04:20:40 +00:00
parent dca8ba4566
commit f11a5782e1
7 changed files with 104 additions and 83 deletions

View File

@@ -45,7 +45,7 @@ class Domain::Fa::Job::ScanFuzzysearchJob < Domain::Fa::Job::Base
post.fuzzysearch_checked_at = Time.now
if response.is_a?(HttpLogEntry)
post.fuzzysearch_entry = response
logger.error("fuzzysearch query failed")
logger.error("fuzzysearch query failed or returned no results")
return
end

View File

@@ -6,11 +6,18 @@ class Scraper::ClientFactory
@gallery_dl_clients = Concurrent::ThreadLocalVar.new(nil)
# for testing only
sig { params(mock: T.nilable(Scraper::HttpClient)).void }
def self.http_client_mock=(mock)
raise unless Rails.env.test?
@http_client_mock = mock
end
sig { returns(T.nilable(Scraper::HttpClient)) }
def self.http_client_mock
raise unless Rails.env.test?
@http_client_mock
end
def self.gallery_dl_client_mock=(mock)
raise unless Rails.env.test?
@gallery_dl_client_mock = mock

View File

@@ -23,7 +23,7 @@ class Scraper::FuzzysearchApiClient
const :artist_name, String
const :deleted, T::Boolean
const :file_url, String
const :file_sha256, String
const :file_sha256, T.nilable(String)
const :tags, T::Array[String]
end
@@ -33,12 +33,20 @@ class Scraper::FuzzysearchApiClient
url.query_values = { search: fa_id.to_s }
response = @http_client.get(url)
return :rate_limit_exceeded if response.status_code == 429
if response.status_code == 429
logger.error(
format_tags(
make_tag("status_code", response.status_code),
"fuzzysearch rate limit exceeded",
),
)
return :rate_limit_exceeded
end
if response.status_code != 200
logger.error(
format_tags(
make_tag("status_code", response.status_code),
make_tag("uri", url.to_s),
"fuzzysearch query failed",
),
)

View File

@@ -1,5 +1,6 @@
# typed: true
class HttpClientMockHelpers
extend T::Sig
include HasColorLogger
extend FactoryBot::Syntax::Methods
extend RSpec::Mocks::ExampleMethods
@@ -41,6 +42,13 @@ class HttpClientMockHelpers
)
end
sig do
params(
http_client_mock: Scraper::HttpClient,
requests: T::Array[T.untyped],
any_order: T::Boolean,
).returns(T::Array[HttpLogEntry])
end
def self.init_http_client_mock(http_client_mock, requests, any_order: false)
if any_order
init_any_order(http_client_mock, requests)
@@ -49,6 +57,19 @@ class HttpClientMockHelpers
end
end
sig do
params(requests: T::Array[T.untyped], any_order: T::Boolean).returns(
T::Array[HttpLogEntry],
)
end
def self.init_with(requests, any_order: false)
if any_order
init_any_order(Scraper::ClientFactory.http_client_mock, requests)
else
init_ordered(Scraper::ClientFactory.http_client_mock, requests)
end
end
def self.init_ordered(http_client_mock, requests)
log_entries = []

View File

@@ -2,33 +2,20 @@
require "rails_helper"
describe Domain::Fa::Job::ScanFuzzysearchJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
let(:client_mock_config) { [] }
let!(:log_entries) do
HttpClientMockHelpers.init_http_client_mock(
http_client_mock,
client_mock_config,
HttpClientMockHelpers.init_with(
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code:,
content_type: "application/json",
contents: File.read("test/fixtures/files/fuzzysearch/#{fa_id}.json"),
},
],
)
end
let(:fuzzysearch_response_51015903) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/51015903.json"))
end
let(:fuzzysearch_response_21275696) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/21275696.json"))
end
let(:fuzzysearch_response_53068507) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/53068507.json"))
end
let(:fuzzysearch_response_61665194) do
JSON.parse(File.read("test/fixtures/files/fuzzysearch/61665194.json"))
end
describe "post was marked removed" do
let(:post) do
create(
@@ -41,17 +28,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and fuzzysearch has post info" do
let(:fa_id) { 51_015_903 }
let(:client_mock_config) do
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_51015903.to_json,
},
]
end
let(:status_code) { 200 }
it "updates the post" do
perform_now({ post: })
@@ -83,6 +60,14 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
expect(post.creator.url_name).to eq("crimetxt")
end
it "sets the file sha256" do
perform_now({ post: })
post.reload
expect(post.fuzzysearch_json["sha256"]).to eq(
"d488dabd8eb22398a228fb662eb520bb4daaac3a9ab0dc9be8b8c5e1b9522efb",
)
end
it "enqueues a fur archiver post file job" do
perform_now({ post: })
post.reload
@@ -110,17 +95,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and fuzzysearch has no post info" do
let(:fa_id) { 21_275_696 }
let(:client_mock_config) do
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_21275696.to_json,
},
]
end
let(:status_code) { 200 }
it "does not set the creator" do
perform_now({ post: })
@@ -137,17 +112,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and the artist name has capitalizations" do
let(:fa_id) { 53_068_507 }
let(:client_mock_config) do
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_53068507.to_json,
},
]
end
let(:status_code) { 200 }
it "sets the creator" do
perform_now({ post: })
@@ -161,17 +126,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
context "and the post has a story url" do
let(:fa_id) { 61_665_194 }
let(:client_mock_config) do
[
{
uri:
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
status_code: 200,
content_type: "application/json",
contents: fuzzysearch_response_61665194.to_json,
},
]
end
let(:status_code) { 200 }
it "does not change the post state" do
perform_now({ post: })
@@ -186,7 +141,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
expect(post.creator.url_name).to eq("irontankris")
end
it "updates keywords", quiet: false do
it "updates keywords" do
post.keywords = []
post.save!
perform_now({ post: })
@@ -194,5 +149,23 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
expect(post.keywords).to include("female", "mlp", "little", "anthro")
end
end
context "the post has no sha256" do
let(:fa_id) { 61_429_615 }
let(:status_code) { 200 }
it "sets the artist" do
perform_now({ post: })
post.reload
expect(post.creator).to be_present
expect(post.creator.url_name).to eq("622000")
end
it "does not set the sha256" do
perform_now({ post: })
post.reload
expect(post.fuzzysearch_json["sha256"]).to be_nil
end
end
end
end

View File

@@ -44,17 +44,11 @@ RSpec.configure do |config|
end
end
# this breaks parallel tests because it's not thread safe
# config.before(:all) do
# # safeguard against running this test in a non-test environment
# root_dir =
# File.absolute_path(Rails.application.config_for("blob_file_location"))
# if root_dir.match?(%r{^#{Rails.root}/tmp})
# FileUtils.rm_rf(root_dir)
# else
# raise "blob_file_location is not in the tmp directory"
# end
# end
# Reset http client mock before each test
config.before(:each) do
Scraper::ClientFactory.http_client_mock =
T.unsafe(self).instance_double("::Scraper::HttpClient")
end
# rspec-expectations config goes here. You can use an alternate
# assertion/expectation library such as wrong or the stdlib/minitest

View File

@@ -0,0 +1,18 @@
[
{
"id": 61429615,
"file_id": 1751417221,
"artist": "622000",
"hash": null,
"hash_str": null,
"url": "https://d.furaffinity.net/art/622000/1751417221/1751417221.622000_1000007071.png",
"filename": "1751417221.622000_1000007071.png",
"rating": "general",
"posted_at": "2025-07-02T00:47:00Z",
"file_size": null,
"sha256": null,
"updated_at": null,
"deleted": false,
"tags": ["oc", "male", "beastmen"]
}
]