normalize fa cdn hosts to avoid redownloading files
This commit is contained in:
@@ -117,11 +117,25 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
uri = Addressable::URI.parse(submission.full_res_img)
|
||||
uri.scheme = "https" if uri.scheme.blank?
|
||||
|
||||
if (file = post.file) && (file.url_str != uri.to_s)
|
||||
file = post.files.build(url_str: uri.to_s)
|
||||
else
|
||||
file = post.file || post.build_file(url_str: uri.to_s)
|
||||
# resolve the existing file and check if the URL has changed.
|
||||
# sometimes, the domain will change from `d.facdn.net` to
|
||||
# `d.furaffinity.net`, and we want to ignore the change in that case
|
||||
file = post.file
|
||||
if file && (old_url_str = file.url_str) && (old_url_str != uri.to_s)
|
||||
if self.class.uri_same_with_normalized_facdn_host?(old_url_str, uri.to_s)
|
||||
logger.info(
|
||||
format_tags(
|
||||
make_tag("old_url_str", old_url_str),
|
||||
make_tag("new_url_str", uri.to_s),
|
||||
"file url has changed, but is the same domain",
|
||||
),
|
||||
)
|
||||
else
|
||||
file = post.files.build(url_str: uri.to_s)
|
||||
end
|
||||
end
|
||||
file ||= post.build_file(url_str: uri.to_s)
|
||||
|
||||
if file.url_str_changed?
|
||||
file.enqueue_job_after_save(
|
||||
Domain::Fa::Job::ScanFileJob,
|
||||
@@ -142,4 +156,22 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
post.posted_at = submission.posted_date&.in_time_zone("UTC")
|
||||
post.scanned_at = Time.now
|
||||
end
|
||||
|
||||
FA_CDN_HOSTS = %w[d.facdn.net d.furaffinity.net].freeze
|
||||
sig { params(url_str: String, new_url_str: String).returns(T::Boolean) }
|
||||
def self.uri_same_with_normalized_facdn_host?(url_str, new_url_str)
|
||||
uri = Addressable::URI.parse(url_str)
|
||||
new_uri = Addressable::URI.parse(new_url_str)
|
||||
uri.scheme = nil
|
||||
new_uri.scheme = nil
|
||||
|
||||
if [uri, new_uri].all? { |uri| FA_CDN_HOSTS.include?(uri.host) }
|
||||
# both URIs have an facdn host, so compare them but ignore the host
|
||||
uri.host = nil
|
||||
new_uri.host = nil
|
||||
uri == new_uri
|
||||
else
|
||||
url_str == new_url_str
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -219,4 +219,81 @@ describe Domain::Fa::Job::ScanPostJob do
|
||||
expect(post.state).to eq("removed")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#uri_same_with_normalized_facdn_host?" do
|
||||
let(:client_mock_config) { [] }
|
||||
|
||||
shared_examples "has result" do |result|
|
||||
it "is #{result.to_s}, both have schema" do
|
||||
url1 = "https://#{host1}#{path1}"
|
||||
url2 = "https://#{host2}#{path2}"
|
||||
expect(
|
||||
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
|
||||
).to eq(result)
|
||||
end
|
||||
|
||||
it "is #{result.to_s}, both missing schema" do
|
||||
url1 = "//#{host1}#{path1}"
|
||||
url2 = "//#{host2}#{path2}"
|
||||
expect(
|
||||
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
|
||||
).to eq(result)
|
||||
end
|
||||
|
||||
it "is #{result.to_s}, one has schema" do
|
||||
url1 = "https://#{host1}#{path1}"
|
||||
url2 = "//#{host2}#{path2}"
|
||||
expect(
|
||||
described_class.uri_same_with_normalized_facdn_host?(url1, url2),
|
||||
).to eq(result)
|
||||
end
|
||||
end
|
||||
|
||||
shared_context "host: different cdn hosts" do
|
||||
let(:host1) { "d.facdn.net" }
|
||||
let(:host2) { "d.furaffinity.net" }
|
||||
end
|
||||
|
||||
shared_context "host: both hosts are d.facdn.net" do
|
||||
let(:host1) { "d.facdn.net" }
|
||||
let(:host2) { "d.facdn.net" }
|
||||
end
|
||||
|
||||
shared_context "host: both hosts are d.furaffinity.net" do
|
||||
let(:host1) { "d.furaffinity.net" }
|
||||
let(:host2) { "d.furaffinity.net" }
|
||||
end
|
||||
|
||||
shared_context "host: one domain is not a cdn" do
|
||||
let(:host1) { "d.facdn.net" }
|
||||
let(:host2) { "example.com" }
|
||||
end
|
||||
|
||||
shared_context "paths: are the same" do
|
||||
let(:path1) { "/art/user/1234567890/image.jpg" }
|
||||
let(:path2) { "/art/user/1234567890/image.jpg" }
|
||||
end
|
||||
|
||||
shared_context "paths: are different" do
|
||||
let(:path1) { "/art/user/1234567890/image.jpg" }
|
||||
let(:path2) { "/art/user/1234567890/some_other_image.jpg" }
|
||||
end
|
||||
|
||||
[
|
||||
["host: different cdn hosts", "paths: are the same", true],
|
||||
["host: both hosts are d.facdn.net", "paths: are the same", true],
|
||||
["host: both hosts are d.furaffinity.net", "paths: are the same", true],
|
||||
["host: one domain is not a cdn", "paths: are the same", false],
|
||||
["host: different cdn hosts", "paths: are different", false],
|
||||
["host: both hosts are d.facdn.net", "paths: are different", false],
|
||||
["host: both hosts are d.furaffinity.net", "paths: are different", false],
|
||||
["host: one domain is not a cdn", "paths: are different", false],
|
||||
].each do |host_context, path_context, result|
|
||||
context "#{host_context} and #{path_context}" do
|
||||
include_context host_context
|
||||
include_context path_context
|
||||
include_examples "has result", result
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user