fuzzysearch handle no sha256 for file
This commit is contained in:
@@ -45,7 +45,7 @@ class Domain::Fa::Job::ScanFuzzysearchJob < Domain::Fa::Job::Base
|
|||||||
post.fuzzysearch_checked_at = Time.now
|
post.fuzzysearch_checked_at = Time.now
|
||||||
if response.is_a?(HttpLogEntry)
|
if response.is_a?(HttpLogEntry)
|
||||||
post.fuzzysearch_entry = response
|
post.fuzzysearch_entry = response
|
||||||
logger.error("fuzzysearch query failed")
|
logger.error("fuzzysearch query failed or returned no results")
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -6,11 +6,18 @@ class Scraper::ClientFactory
|
|||||||
@gallery_dl_clients = Concurrent::ThreadLocalVar.new(nil)
|
@gallery_dl_clients = Concurrent::ThreadLocalVar.new(nil)
|
||||||
|
|
||||||
# for testing only
|
# for testing only
|
||||||
|
sig { params(mock: T.nilable(Scraper::HttpClient)).void }
|
||||||
def self.http_client_mock=(mock)
|
def self.http_client_mock=(mock)
|
||||||
raise unless Rails.env.test?
|
raise unless Rails.env.test?
|
||||||
@http_client_mock = mock
|
@http_client_mock = mock
|
||||||
end
|
end
|
||||||
|
|
||||||
|
sig { returns(T.nilable(Scraper::HttpClient)) }
|
||||||
|
def self.http_client_mock
|
||||||
|
raise unless Rails.env.test?
|
||||||
|
@http_client_mock
|
||||||
|
end
|
||||||
|
|
||||||
def self.gallery_dl_client_mock=(mock)
|
def self.gallery_dl_client_mock=(mock)
|
||||||
raise unless Rails.env.test?
|
raise unless Rails.env.test?
|
||||||
@gallery_dl_client_mock = mock
|
@gallery_dl_client_mock = mock
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class Scraper::FuzzysearchApiClient
|
|||||||
const :artist_name, String
|
const :artist_name, String
|
||||||
const :deleted, T::Boolean
|
const :deleted, T::Boolean
|
||||||
const :file_url, String
|
const :file_url, String
|
||||||
const :file_sha256, String
|
const :file_sha256, T.nilable(String)
|
||||||
const :tags, T::Array[String]
|
const :tags, T::Array[String]
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -33,12 +33,20 @@ class Scraper::FuzzysearchApiClient
|
|||||||
url.query_values = { search: fa_id.to_s }
|
url.query_values = { search: fa_id.to_s }
|
||||||
response = @http_client.get(url)
|
response = @http_client.get(url)
|
||||||
|
|
||||||
return :rate_limit_exceeded if response.status_code == 429
|
if response.status_code == 429
|
||||||
|
logger.error(
|
||||||
|
format_tags(
|
||||||
|
make_tag("status_code", response.status_code),
|
||||||
|
"fuzzysearch rate limit exceeded",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return :rate_limit_exceeded
|
||||||
|
end
|
||||||
|
|
||||||
if response.status_code != 200
|
if response.status_code != 200
|
||||||
logger.error(
|
logger.error(
|
||||||
format_tags(
|
format_tags(
|
||||||
make_tag("status_code", response.status_code),
|
make_tag("status_code", response.status_code),
|
||||||
make_tag("uri", url.to_s),
|
|
||||||
"fuzzysearch query failed",
|
"fuzzysearch query failed",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
# typed: true
|
# typed: true
|
||||||
class HttpClientMockHelpers
|
class HttpClientMockHelpers
|
||||||
|
extend T::Sig
|
||||||
include HasColorLogger
|
include HasColorLogger
|
||||||
extend FactoryBot::Syntax::Methods
|
extend FactoryBot::Syntax::Methods
|
||||||
extend RSpec::Mocks::ExampleMethods
|
extend RSpec::Mocks::ExampleMethods
|
||||||
@@ -41,6 +42,13 @@ class HttpClientMockHelpers
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
sig do
|
||||||
|
params(
|
||||||
|
http_client_mock: Scraper::HttpClient,
|
||||||
|
requests: T::Array[T.untyped],
|
||||||
|
any_order: T::Boolean,
|
||||||
|
).returns(T::Array[HttpLogEntry])
|
||||||
|
end
|
||||||
def self.init_http_client_mock(http_client_mock, requests, any_order: false)
|
def self.init_http_client_mock(http_client_mock, requests, any_order: false)
|
||||||
if any_order
|
if any_order
|
||||||
init_any_order(http_client_mock, requests)
|
init_any_order(http_client_mock, requests)
|
||||||
@@ -49,6 +57,19 @@ class HttpClientMockHelpers
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
sig do
|
||||||
|
params(requests: T::Array[T.untyped], any_order: T::Boolean).returns(
|
||||||
|
T::Array[HttpLogEntry],
|
||||||
|
)
|
||||||
|
end
|
||||||
|
def self.init_with(requests, any_order: false)
|
||||||
|
if any_order
|
||||||
|
init_any_order(Scraper::ClientFactory.http_client_mock, requests)
|
||||||
|
else
|
||||||
|
init_ordered(Scraper::ClientFactory.http_client_mock, requests)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def self.init_ordered(http_client_mock, requests)
|
def self.init_ordered(http_client_mock, requests)
|
||||||
log_entries = []
|
log_entries = []
|
||||||
|
|
||||||
|
|||||||
@@ -2,33 +2,20 @@
|
|||||||
require "rails_helper"
|
require "rails_helper"
|
||||||
|
|
||||||
describe Domain::Fa::Job::ScanFuzzysearchJob do
|
describe Domain::Fa::Job::ScanFuzzysearchJob do
|
||||||
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
|
|
||||||
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
|
|
||||||
|
|
||||||
let(:client_mock_config) { [] }
|
|
||||||
let!(:log_entries) do
|
let!(:log_entries) do
|
||||||
HttpClientMockHelpers.init_http_client_mock(
|
HttpClientMockHelpers.init_with(
|
||||||
http_client_mock,
|
[
|
||||||
client_mock_config,
|
{
|
||||||
|
uri:
|
||||||
|
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
|
||||||
|
status_code:,
|
||||||
|
content_type: "application/json",
|
||||||
|
contents: File.read("test/fixtures/files/fuzzysearch/#{fa_id}.json"),
|
||||||
|
},
|
||||||
|
],
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
let(:fuzzysearch_response_51015903) do
|
|
||||||
JSON.parse(File.read("test/fixtures/files/fuzzysearch/51015903.json"))
|
|
||||||
end
|
|
||||||
|
|
||||||
let(:fuzzysearch_response_21275696) do
|
|
||||||
JSON.parse(File.read("test/fixtures/files/fuzzysearch/21275696.json"))
|
|
||||||
end
|
|
||||||
|
|
||||||
let(:fuzzysearch_response_53068507) do
|
|
||||||
JSON.parse(File.read("test/fixtures/files/fuzzysearch/53068507.json"))
|
|
||||||
end
|
|
||||||
|
|
||||||
let(:fuzzysearch_response_61665194) do
|
|
||||||
JSON.parse(File.read("test/fixtures/files/fuzzysearch/61665194.json"))
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "post was marked removed" do
|
describe "post was marked removed" do
|
||||||
let(:post) do
|
let(:post) do
|
||||||
create(
|
create(
|
||||||
@@ -41,17 +28,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
|
|||||||
|
|
||||||
context "and fuzzysearch has post info" do
|
context "and fuzzysearch has post info" do
|
||||||
let(:fa_id) { 51_015_903 }
|
let(:fa_id) { 51_015_903 }
|
||||||
let(:client_mock_config) do
|
let(:status_code) { 200 }
|
||||||
[
|
|
||||||
{
|
|
||||||
uri:
|
|
||||||
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
|
|
||||||
status_code: 200,
|
|
||||||
content_type: "application/json",
|
|
||||||
contents: fuzzysearch_response_51015903.to_json,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
it "updates the post" do
|
it "updates the post" do
|
||||||
perform_now({ post: })
|
perform_now({ post: })
|
||||||
@@ -83,6 +60,14 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
|
|||||||
expect(post.creator.url_name).to eq("crimetxt")
|
expect(post.creator.url_name).to eq("crimetxt")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "sets the file sha256" do
|
||||||
|
perform_now({ post: })
|
||||||
|
post.reload
|
||||||
|
expect(post.fuzzysearch_json["sha256"]).to eq(
|
||||||
|
"d488dabd8eb22398a228fb662eb520bb4daaac3a9ab0dc9be8b8c5e1b9522efb",
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
it "enqueues a fur archiver post file job" do
|
it "enqueues a fur archiver post file job" do
|
||||||
perform_now({ post: })
|
perform_now({ post: })
|
||||||
post.reload
|
post.reload
|
||||||
@@ -110,17 +95,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
|
|||||||
|
|
||||||
context "and fuzzysearch has no post info" do
|
context "and fuzzysearch has no post info" do
|
||||||
let(:fa_id) { 21_275_696 }
|
let(:fa_id) { 21_275_696 }
|
||||||
let(:client_mock_config) do
|
let(:status_code) { 200 }
|
||||||
[
|
|
||||||
{
|
|
||||||
uri:
|
|
||||||
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
|
|
||||||
status_code: 200,
|
|
||||||
content_type: "application/json",
|
|
||||||
contents: fuzzysearch_response_21275696.to_json,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
it "does not set the creator" do
|
it "does not set the creator" do
|
||||||
perform_now({ post: })
|
perform_now({ post: })
|
||||||
@@ -137,17 +112,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
|
|||||||
|
|
||||||
context "and the artist name has capitalizations" do
|
context "and the artist name has capitalizations" do
|
||||||
let(:fa_id) { 53_068_507 }
|
let(:fa_id) { 53_068_507 }
|
||||||
let(:client_mock_config) do
|
let(:status_code) { 200 }
|
||||||
[
|
|
||||||
{
|
|
||||||
uri:
|
|
||||||
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
|
|
||||||
status_code: 200,
|
|
||||||
content_type: "application/json",
|
|
||||||
contents: fuzzysearch_response_53068507.to_json,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
it "sets the creator" do
|
it "sets the creator" do
|
||||||
perform_now({ post: })
|
perform_now({ post: })
|
||||||
@@ -161,17 +126,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
|
|||||||
|
|
||||||
context "and the post has a story url" do
|
context "and the post has a story url" do
|
||||||
let(:fa_id) { 61_665_194 }
|
let(:fa_id) { 61_665_194 }
|
||||||
let(:client_mock_config) do
|
let(:status_code) { 200 }
|
||||||
[
|
|
||||||
{
|
|
||||||
uri:
|
|
||||||
"https://api-next.fuzzysearch.net/v1/file/furaffinity?search=#{fa_id}",
|
|
||||||
status_code: 200,
|
|
||||||
content_type: "application/json",
|
|
||||||
contents: fuzzysearch_response_61665194.to_json,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
it "does not change the post state" do
|
it "does not change the post state" do
|
||||||
perform_now({ post: })
|
perform_now({ post: })
|
||||||
@@ -186,7 +141,7 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
|
|||||||
expect(post.creator.url_name).to eq("irontankris")
|
expect(post.creator.url_name).to eq("irontankris")
|
||||||
end
|
end
|
||||||
|
|
||||||
it "updates keywords", quiet: false do
|
it "updates keywords" do
|
||||||
post.keywords = []
|
post.keywords = []
|
||||||
post.save!
|
post.save!
|
||||||
perform_now({ post: })
|
perform_now({ post: })
|
||||||
@@ -194,5 +149,23 @@ describe Domain::Fa::Job::ScanFuzzysearchJob do
|
|||||||
expect(post.keywords).to include("female", "mlp", "little", "anthro")
|
expect(post.keywords).to include("female", "mlp", "little", "anthro")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "the post has no sha256" do
|
||||||
|
let(:fa_id) { 61_429_615 }
|
||||||
|
let(:status_code) { 200 }
|
||||||
|
|
||||||
|
it "sets the artist" do
|
||||||
|
perform_now({ post: })
|
||||||
|
post.reload
|
||||||
|
expect(post.creator).to be_present
|
||||||
|
expect(post.creator.url_name).to eq("622000")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not set the sha256" do
|
||||||
|
perform_now({ post: })
|
||||||
|
post.reload
|
||||||
|
expect(post.fuzzysearch_json["sha256"]).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -44,17 +44,11 @@ RSpec.configure do |config|
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# this breaks parallel tests because it's not thread safe
|
# Reset http client mock before each test
|
||||||
# config.before(:all) do
|
config.before(:each) do
|
||||||
# # safeguard against running this test in a non-test environment
|
Scraper::ClientFactory.http_client_mock =
|
||||||
# root_dir =
|
T.unsafe(self).instance_double("::Scraper::HttpClient")
|
||||||
# File.absolute_path(Rails.application.config_for("blob_file_location"))
|
end
|
||||||
# if root_dir.match?(%r{^#{Rails.root}/tmp})
|
|
||||||
# FileUtils.rm_rf(root_dir)
|
|
||||||
# else
|
|
||||||
# raise "blob_file_location is not in the tmp directory"
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# rspec-expectations config goes here. You can use an alternate
|
# rspec-expectations config goes here. You can use an alternate
|
||||||
# assertion/expectation library such as wrong or the stdlib/minitest
|
# assertion/expectation library such as wrong or the stdlib/minitest
|
||||||
|
|||||||
18
test/fixtures/files/fuzzysearch/61429615.json
vendored
Normal file
18
test/fixtures/files/fuzzysearch/61429615.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 61429615,
|
||||||
|
"file_id": 1751417221,
|
||||||
|
"artist": "622000",
|
||||||
|
"hash": null,
|
||||||
|
"hash_str": null,
|
||||||
|
"url": "https://d.furaffinity.net/art/622000/1751417221/1751417221.622000_1000007071.png",
|
||||||
|
"filename": "1751417221.622000_1000007071.png",
|
||||||
|
"rating": "general",
|
||||||
|
"posted_at": "2025-07-02T00:47:00Z",
|
||||||
|
"file_size": null,
|
||||||
|
"sha256": null,
|
||||||
|
"updated_at": null,
|
||||||
|
"deleted": false,
|
||||||
|
"tags": ["oc", "male", "beastmen"]
|
||||||
|
}
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user