parse posted_at from fa static file url
This commit is contained in:
57
app/helpers/fa_uri_helper.rb
Normal file
57
app/helpers/fa_uri_helper.rb
Normal file
@@ -0,0 +1,57 @@
|
||||
# typed: strict
|
||||
|
||||
module FaUriHelper
|
||||
extend T::Sig
|
||||
|
||||
FA_CDN_HOSTS = %w[d.facdn.net d.furaffinity.net].freeze
|
||||
|
||||
class FaMediaUrlInfo < T::ImmutableStruct
|
||||
extend T::Sig
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
const :url_name, String
|
||||
const :original_file_posted, Integer
|
||||
const :latest_file_posted, Integer
|
||||
const :filename, String
|
||||
|
||||
sig { returns(Time) }
|
||||
def original_file_posted_at
|
||||
Time.at(original_file_posted)
|
||||
end
|
||||
|
||||
sig { returns(Time) }
|
||||
def latest_file_posted_at
|
||||
Time.at(latest_file_posted)
|
||||
end
|
||||
end
|
||||
|
||||
sig { params(url_str: String).returns(T.nilable(FaMediaUrlInfo)) }
|
||||
def self.parse_fa_media_url(url_str)
|
||||
uri = Addressable::URI.parse(url_str)
|
||||
return nil unless is_fa_cdn_host?(uri.host)
|
||||
# paths are in the form of `art/<user.url_name>/<latest_file_ts>/<og_file_ts>.<rest_of_filename>`
|
||||
# latest_file_ts is the timestamp of the most up to date file that has been uploaded for the post
|
||||
# og_file_ts is the timestamp of when the post was originally made
|
||||
path = uri.path
|
||||
match =
|
||||
path.match(
|
||||
%r{/art/(?<url_name>[^/]+)/(?<latest_ts>\d+)/(?<original_ts>\d+)\.(?<filename>.*)},
|
||||
)
|
||||
return nil unless match
|
||||
url_name = match[:url_name]
|
||||
latest_ts = match[:latest_ts].to_i
|
||||
original_ts = match[:original_ts].to_i
|
||||
filename = match[:filename]
|
||||
FaMediaUrlInfo.new(
|
||||
url_name:,
|
||||
original_file_posted: original_ts,
|
||||
latest_file_posted: latest_ts,
|
||||
filename:,
|
||||
)
|
||||
end
|
||||
|
||||
sig { params(host: String).returns(T::Boolean) }
|
||||
def self.is_fa_cdn_host?(host)
|
||||
FA_CDN_HOSTS.include?(host)
|
||||
end
|
||||
end
|
||||
@@ -157,7 +157,6 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
post.scanned_at = Time.now
|
||||
end
|
||||
|
||||
FA_CDN_HOSTS = %w[d.facdn.net d.furaffinity.net].freeze
|
||||
sig { params(url_str: String, new_url_str: String).returns(T::Boolean) }
|
||||
def self.uri_same_with_normalized_facdn_host?(url_str, new_url_str)
|
||||
uri = Addressable::URI.parse(url_str)
|
||||
@@ -165,7 +164,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
uri.scheme = nil
|
||||
new_uri.scheme = nil
|
||||
|
||||
if [uri, new_uri].all? { |uri| FA_CDN_HOSTS.include?(uri.host) }
|
||||
if [uri, new_uri].all? { |uri| FaUriHelper.is_fa_cdn_host?(uri.host) }
|
||||
# both URIs have an facdn host, so compare them but ignore the host
|
||||
uri.host = nil
|
||||
new_uri.host = nil
|
||||
|
||||
@@ -177,7 +177,12 @@ class Domain::Post::FaPost < Domain::Post
|
||||
pa = super
|
||||
return pa unless pa.nil?
|
||||
|
||||
log_entry = guess_last_submission_log_entry
|
||||
if file_url_str = file&.url_str
|
||||
parsed = FaUriHelper.parse_fa_media_url(file_url_str)
|
||||
return parsed.original_file_posted_at.in_time_zone("UTC") if parsed
|
||||
end
|
||||
|
||||
log_entry = last_submission_log_entry || guess_last_submission_log_entry
|
||||
if log_entry&.response_bytes
|
||||
parser =
|
||||
Domain::Fa::Parser::Page.from_log_entry(
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="mt-2 flex flex-wrap gap-x-4 text-slate-600 leading-relaxed justify-between">
|
||||
<span title="<%= post.posted_at&.strftime("%Y-%m-%d") %>">
|
||||
<span title="<%= post.posted_at&.strftime("%Y-%m-%d") || 'Unknown' %>">
|
||||
<i class="fa-regular fa-calendar mr-1"></i>
|
||||
<% if posted_at = post.posted_at %>
|
||||
Posted <%= time_ago_in_words(posted_at) %> ago
|
||||
|
||||
38
spec/helpers/fa_uri_helper_spec.rb
Normal file
38
spec/helpers/fa_uri_helper_spec.rb
Normal file
@@ -0,0 +1,38 @@
|
||||
# typed: false
|
||||
require "rails_helper"
|
||||
|
||||
RSpec.describe FaUriHelper do
|
||||
describe "#parse_fa_media_url" do
|
||||
it "parses a media URL with same timestamps" do
|
||||
url =
|
||||
"https://d.furaffinity.net/art/zzreg/1740700581/1740700581.zzreg_stippling-crop.jpg"
|
||||
parsed = described_class.parse_fa_media_url(url)
|
||||
expect(parsed).to eq(
|
||||
FaUriHelper::FaMediaUrlInfo.new(
|
||||
url_name: "zzreg",
|
||||
original_file_posted: 1_740_700_581,
|
||||
latest_file_posted: 1_740_700_581,
|
||||
filename: "zzreg_stippling-crop.jpg",
|
||||
),
|
||||
)
|
||||
expect(parsed.original_file_posted_at).to eq(Time.at(1_740_700_581))
|
||||
expect(parsed.latest_file_posted_at).to eq(Time.at(1_740_700_581))
|
||||
end
|
||||
|
||||
it "parses a media URL with different timestamps" do
|
||||
url =
|
||||
"https://d.furaffinity.net/art/zzreg/1753374875/1740700581.zzreg_stippling-crop.jpg"
|
||||
parsed = described_class.parse_fa_media_url(url)
|
||||
expect(parsed).to eq(
|
||||
FaUriHelper::FaMediaUrlInfo.new(
|
||||
url_name: "zzreg",
|
||||
original_file_posted: 1_740_700_581,
|
||||
latest_file_posted: 1_753_374_875,
|
||||
filename: "zzreg_stippling-crop.jpg",
|
||||
),
|
||||
)
|
||||
expect(parsed.original_file_posted_at).to eq(Time.at(1_740_700_581))
|
||||
expect(parsed.latest_file_posted_at).to eq(Time.at(1_753_374_875))
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -48,6 +48,53 @@ RSpec.describe Domain::Post::FaPost do
|
||||
end
|
||||
end
|
||||
|
||||
describe "posted_at" do
|
||||
let(:post_url_str) do
|
||||
"https://d.furaffinity.net/art/zzreg/1740700581/1740700581.zzreg_stippling-crop.jpg"
|
||||
end
|
||||
let(:post) { create(:domain_post_fa_post, posted_at: 1.day.ago) }
|
||||
let(:post_file) do
|
||||
create(:domain_post_file, post: post, url_str: post_url_str)
|
||||
end
|
||||
|
||||
it "can be extracted from file uri if no explicit posted_at is set" do
|
||||
post_file # ensure the file is created
|
||||
post.posted_at = nil
|
||||
post.save!
|
||||
post.reload
|
||||
expect(post.posted_at).to eq(Time.at(1_740_700_581))
|
||||
end
|
||||
|
||||
it "uses the existing posted_at if it is set" do
|
||||
post_file # ensure the file is created
|
||||
expect(post.posted_at).to be_within(10.seconds).of(1.day.ago)
|
||||
end
|
||||
|
||||
it "guesses it from the submission log entry if all else fails" do
|
||||
# do not create the post file
|
||||
post.posted_at = nil
|
||||
post.last_submission_log_entry =
|
||||
create(
|
||||
:http_log_entry,
|
||||
response:
|
||||
create(
|
||||
:blob_file,
|
||||
contents:
|
||||
File.read(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/domain/fa/submission/submission_page_59723907.html",
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
post.save!
|
||||
post.reload
|
||||
expect(post.posted_at).to be_within(1.minute).of(
|
||||
Time.parse("Feb 1, 2025 07:15 AM PST"),
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe "attributes" do
|
||||
let(:post) { build(:domain_post_fa_post) }
|
||||
let(:time) { Time.now }
|
||||
|
||||
Reference in New Issue
Block a user