Files
redux-scraper/app/lib/scraper/fuzzysearch_api_client.rb
2025-07-25 04:20:40 +00:00

100 lines
2.6 KiB
Ruby

# typed: strict
# frozen_string_literal: true
class Scraper::FuzzysearchApiClient
extend T::Sig
include HasColorLogger
API_BASE_URL = "https://api-next.fuzzysearch.net"
API_PATH_FA_ID_INFO = "/v1/file/furaffinity"
sig { params(http_client: Scraper::JobBase::WrappedHttpClient).void }
def initialize(http_client)
@http_client = http_client
end
class FaIdInfo < T::ImmutableStruct
include T::Struct::ActsAsComparable
const :log_entry, HttpLogEntry
const :json, T::Hash[String, T.untyped]
const :fa_id, Integer
const :artist_url_name, String
const :artist_name, String
const :deleted, T::Boolean
const :file_url, String
const :file_sha256, T.nilable(String)
const :tags, T::Array[String]
end
sig { params(fa_id: Integer).returns(T.any(HttpLogEntry, FaIdInfo, Symbol)) }
def search_fa_id_info(fa_id)
url = Addressable::URI.parse("#{API_BASE_URL}#{API_PATH_FA_ID_INFO}")
url.query_values = { search: fa_id.to_s }
response = @http_client.get(url)
if response.status_code == 429
logger.error(
format_tags(
make_tag("status_code", response.status_code),
"fuzzysearch rate limit exceeded",
),
)
return :rate_limit_exceeded
end
if response.status_code != 200
logger.error(
format_tags(
make_tag("status_code", response.status_code),
"fuzzysearch query failed",
),
)
return response.log_entry
end
json = JSON.parse(response.body)
unless json.is_a?(Array)
logger.error("fuzzysearch response is not an array")
return response.log_entry
end
if json.empty?
logger.error("fuzzysearch response is empty")
return response.log_entry
end
json = json.first
unless json.is_a?(Hash)
logger.error("fuzzysearch response is not a hash")
return response.log_entry
end
file_url = json["url"]
if file_url.blank?
logger.error("fuzzysearch response has no file url")
return response.log_entry
end
url_parsed = FaUriHelper.parse_fa_media_url(file_url)
if url_parsed.blank?
logger.error(
format_tags(make_tag("file_url", file_url), "failed to parse file url"),
)
return response.log_entry
end
FaIdInfo.new(
log_entry: response.log_entry,
json:,
fa_id: json["id"],
artist_url_name: url_parsed.url_name,
artist_name: json["artist"],
deleted: json["deleted"],
file_url:,
file_sha256: json["sha256"],
tags: json["tags"],
)
end
private
end