parse posted_at from fa static file url

This commit is contained in:
Dylan Knutson
2025-07-24 17:17:42 +00:00
parent 457a4e4609
commit af4d84ccb1
6 changed files with 150 additions and 4 deletions

View File

@@ -0,0 +1,57 @@
# typed: strict
module FaUriHelper
extend T::Sig
FA_CDN_HOSTS = %w[d.facdn.net d.furaffinity.net].freeze
class FaMediaUrlInfo < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
const :url_name, String
const :original_file_posted, Integer
const :latest_file_posted, Integer
const :filename, String
sig { returns(Time) }
def original_file_posted_at
Time.at(original_file_posted)
end
sig { returns(Time) }
def latest_file_posted_at
Time.at(latest_file_posted)
end
end
sig { params(url_str: String).returns(T.nilable(FaMediaUrlInfo)) }
def self.parse_fa_media_url(url_str)
uri = Addressable::URI.parse(url_str)
return nil unless is_fa_cdn_host?(uri.host)
# paths are in the form of `art/<user.url_name>/<latest_file_ts>/<og_file_ts>.<rest_of_filename>`
# latest_file_ts is the timestamp of the most up to date file that has been uploaded for the post
# og_file_ts is the timestamp of when the post was originally made
path = uri.path
match =
path.match(
%r{/art/(?<url_name>[^/]+)/(?<latest_ts>\d+)/(?<original_ts>\d+)\.(?<filename>.*)},
)
return nil unless match
url_name = match[:url_name]
latest_ts = match[:latest_ts].to_i
original_ts = match[:original_ts].to_i
filename = match[:filename]
FaMediaUrlInfo.new(
url_name:,
original_file_posted: original_ts,
latest_file_posted: latest_ts,
filename:,
)
end
sig { params(host: String).returns(T::Boolean) }
def self.is_fa_cdn_host?(host)
FA_CDN_HOSTS.include?(host)
end
end

View File

@@ -157,7 +157,6 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
post.scanned_at = Time.now
end
FA_CDN_HOSTS = %w[d.facdn.net d.furaffinity.net].freeze
sig { params(url_str: String, new_url_str: String).returns(T::Boolean) }
def self.uri_same_with_normalized_facdn_host?(url_str, new_url_str)
uri = Addressable::URI.parse(url_str)
@@ -165,7 +164,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
uri.scheme = nil
new_uri.scheme = nil
if [uri, new_uri].all? { |uri| FA_CDN_HOSTS.include?(uri.host) }
if [uri, new_uri].all? { |uri| FaUriHelper.is_fa_cdn_host?(uri.host) }
# both URIs have an facdn host, so compare them but ignore the host
uri.host = nil
new_uri.host = nil

View File

@@ -177,7 +177,12 @@ class Domain::Post::FaPost < Domain::Post
pa = super
return pa unless pa.nil?
log_entry = guess_last_submission_log_entry
if file_url_str = file&.url_str
parsed = FaUriHelper.parse_fa_media_url(file_url_str)
return parsed.original_file_posted_at.in_time_zone("UTC") if parsed
end
log_entry = last_submission_log_entry || guess_last_submission_log_entry
if log_entry&.response_bytes
parser =
Domain::Fa::Parser::Page.from_log_entry(

View File

@@ -28,7 +28,7 @@
</div>
</div>
<div class="mt-2 flex flex-wrap gap-x-4 text-slate-600 leading-relaxed justify-between">
<span title="<%= post.posted_at&.strftime("%Y-%m-%d") %>">
<span title="<%= post.posted_at&.strftime("%Y-%m-%d") || 'Unknown' %>">
<i class="fa-regular fa-calendar mr-1"></i>
<% if posted_at = post.posted_at %>
Posted <%= time_ago_in_words(posted_at) %> ago

View File

@@ -0,0 +1,38 @@
# typed: false
require "rails_helper"
RSpec.describe FaUriHelper do
describe "#parse_fa_media_url" do
it "parses a media URL with same timestamps" do
url =
"https://d.furaffinity.net/art/zzreg/1740700581/1740700581.zzreg_stippling-crop.jpg"
parsed = described_class.parse_fa_media_url(url)
expect(parsed).to eq(
FaUriHelper::FaMediaUrlInfo.new(
url_name: "zzreg",
original_file_posted: 1_740_700_581,
latest_file_posted: 1_740_700_581,
filename: "zzreg_stippling-crop.jpg",
),
)
expect(parsed.original_file_posted_at).to eq(Time.at(1_740_700_581))
expect(parsed.latest_file_posted_at).to eq(Time.at(1_740_700_581))
end
it "parses a media URL with different timestamps" do
url =
"https://d.furaffinity.net/art/zzreg/1753374875/1740700581.zzreg_stippling-crop.jpg"
parsed = described_class.parse_fa_media_url(url)
expect(parsed).to eq(
FaUriHelper::FaMediaUrlInfo.new(
url_name: "zzreg",
original_file_posted: 1_740_700_581,
latest_file_posted: 1_753_374_875,
filename: "zzreg_stippling-crop.jpg",
),
)
expect(parsed.original_file_posted_at).to eq(Time.at(1_740_700_581))
expect(parsed.latest_file_posted_at).to eq(Time.at(1_753_374_875))
end
end
end

View File

@@ -48,6 +48,53 @@ RSpec.describe Domain::Post::FaPost do
end
end
describe "posted_at" do
let(:post_url_str) do
"https://d.furaffinity.net/art/zzreg/1740700581/1740700581.zzreg_stippling-crop.jpg"
end
let(:post) { create(:domain_post_fa_post, posted_at: 1.day.ago) }
let(:post_file) do
create(:domain_post_file, post: post, url_str: post_url_str)
end
it "can be extracted from file uri if no explicit posted_at is set" do
post_file # ensure the file is created
post.posted_at = nil
post.save!
post.reload
expect(post.posted_at).to eq(Time.at(1_740_700_581))
end
it "uses the existing posted_at if it is set" do
post_file # ensure the file is created
expect(post.posted_at).to be_within(10.seconds).of(1.day.ago)
end
it "guesses it from the submission log entry if all else fails" do
# do not create the post file
post.posted_at = nil
post.last_submission_log_entry =
create(
:http_log_entry,
response:
create(
:blob_file,
contents:
File.read(
Rails.root.join(
"test/fixtures/files/domain/fa/submission/submission_page_59723907.html",
),
),
),
)
post.save!
post.reload
expect(post.posted_at).to be_within(1.minute).of(
Time.parse("Feb 1, 2025 07:15 AM PST"),
)
end
end
describe "attributes" do
let(:post) { build(:domain_post_fa_post) }
let(:time) { Time.now }