4 Commits

Author SHA1 Message Date
Dylan Knutson
bbca0db010 make fa:enqueue_missing_posts incremental 2025-07-24 17:47:04 +00:00
Dylan Knutson
af4d84ccb1 parse posted_at from fa static file url 2025-07-24 17:17:42 +00:00
Dylan Knutson
457a4e4609 Create task task-71 2025-07-24 17:14:59 +00:00
Dylan Knutson
3ea2428814 Create task task-70 2025-07-24 17:07:32 +00:00
9 changed files with 229 additions and 8 deletions

View File

@@ -0,0 +1,57 @@
# typed: strict
module FaUriHelper
extend T::Sig
FA_CDN_HOSTS = %w[d.facdn.net d.furaffinity.net].freeze
class FaMediaUrlInfo < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
const :url_name, String
const :original_file_posted, Integer
const :latest_file_posted, Integer
const :filename, String
sig { returns(Time) }
def original_file_posted_at
Time.at(original_file_posted)
end
sig { returns(Time) }
def latest_file_posted_at
Time.at(latest_file_posted)
end
end
sig { params(url_str: String).returns(T.nilable(FaMediaUrlInfo)) }
def self.parse_fa_media_url(url_str)
uri = Addressable::URI.parse(url_str)
return nil unless is_fa_cdn_host?(uri.host)
# paths are in the form of `art/<user.url_name>/<latest_file_ts>/<og_file_ts>.<rest_of_filename>`
# latest_file_ts is the timestamp of the most up to date file that has been uploaded for the post
# og_file_ts is the timestamp of when the post was originally made
path = uri.path
match =
path.match(
%r{/art/(?<url_name>[^/]+)/(?<latest_ts>\d+)/(?<original_ts>\d+)\.(?<filename>.*)},
)
return nil unless match
url_name = match[:url_name]
latest_ts = match[:latest_ts].to_i
original_ts = match[:original_ts].to_i
filename = match[:filename]
FaMediaUrlInfo.new(
url_name:,
original_file_posted: original_ts,
latest_file_posted: latest_ts,
filename:,
)
end
sig { params(host: String).returns(T::Boolean) }
def self.is_fa_cdn_host?(host)
FA_CDN_HOSTS.include?(host)
end
end

View File

@@ -157,7 +157,6 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
post.scanned_at = Time.now
end
FA_CDN_HOSTS = %w[d.facdn.net d.furaffinity.net].freeze
sig { params(url_str: String, new_url_str: String).returns(T::Boolean) }
def self.uri_same_with_normalized_facdn_host?(url_str, new_url_str)
uri = Addressable::URI.parse(url_str)
@@ -165,7 +164,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
uri.scheme = nil
new_uri.scheme = nil
if [uri, new_uri].all? { |uri| FA_CDN_HOSTS.include?(uri.host) }
if [uri, new_uri].all? { |uri| FaUriHelper.is_fa_cdn_host?(uri.host) }
# both URIs have an facdn host, so compare them but ignore the host
uri.host = nil
new_uri.host = nil

View File

@@ -177,7 +177,12 @@ class Domain::Post::FaPost < Domain::Post
pa = super
return pa unless pa.nil?
log_entry = guess_last_submission_log_entry
if file_url_str = file&.url_str
parsed = FaUriHelper.parse_fa_media_url(file_url_str)
return parsed.original_file_posted_at.in_time_zone("UTC") if parsed
end
log_entry = last_submission_log_entry || guess_last_submission_log_entry
if log_entry&.response_bytes
parser =
Domain::Fa::Parser::Page.from_log_entry(

View File

@@ -28,7 +28,7 @@
</div>
</div>
<div class="mt-2 flex flex-wrap gap-x-4 text-slate-600 leading-relaxed justify-between">
<span title="<%= post.posted_at&.strftime("%Y-%m-%d") %>">
<span title="<%= post.posted_at&.strftime("%Y-%m-%d") || 'Unknown' %>">
<i class="fa-regular fa-calendar mr-1"></i>
<% if posted_at = post.posted_at %>
Posted <%= time_ago_in_words(posted_at) %> ago

View File

@@ -0,0 +1,21 @@
---
id: task-70
title: Investigate VIPS segfault for post file ID ~18594368
status: To Do
assignee: []
created_date: '2025-07-24'
labels: []
dependencies: []
---
## Description
VIPS is crashing with a segmentation fault when processing a post file with ID around 18594368. This is causing application instability and preventing proper file processing. Need to identify the root cause and implement a fix or workaround.
## Acceptance Criteria
- [ ] Root cause of VIPS segfault is identified
- [ ] Problematic file or processing step is isolated
- [ ] Fix or workaround is implemented to prevent segfaults
- [ ] Application stability is restored for this file type
- [ ] Regression testing confirms no new segfaults introduced

View File

@@ -0,0 +1,22 @@
---
id: task-71
title: Add SimpleCov for code coverage reporting
status: To Do
assignee: []
created_date: '2025-07-24'
labels: []
dependencies: []
---
## Description
Implement code coverage reporting to track test coverage across the codebase and identify untested code areas, helping improve overall test quality and maintenance
## Acceptance Criteria
- [ ] SimpleCov gem is added to Gemfile and installed
- [ ] Code coverage reports are generated when running specs
- [ ] Coverage reports exclude irrelevant files (vendor gems spec files etc)
- [ ] Minimum coverage threshold is configured and enforced
- [ ] Coverage reports are accessible in HTML format for review
- [ ] Coverage data integrates with existing test workflow

View File

@@ -22,10 +22,40 @@ namespace :fa do
end
task enqueue_missing_posts: %i[set_logger_stdout environment] do |t, args|
start_at =
ENV["start_at"]&.to_i ||
raise("need start_at (highest fa_id already present)")
stop_at = ENV["stop_at"]&.to_i
helper = Class.new.extend(ActionView::Helpers::NumberHelper)
global_state_key = "task-fa-enqueue-missing-posts-incremental"
start_at = ENV["start_at"]
if start_at.is_a?(String) && start_at == "last"
start_at = Domain::Post::FaPost.where(state: :ok).maximum(:fa_id) - 1000
start_at = 0 if start_at < 0
stop_at =
ENV["stop_at"]&.to_i || GlobalState.get(global_state_key)&.to_i ||
raise("need explicitly set stop_at")
else
stop_at = ENV["stop_at"]&.to_i
start_at =
start_at.to_i || raise("need start_at (highest fa_id already present)")
end
puts "start fa id: #{start_at.to_s.bold}"
puts "stop fa id: #{stop_at.to_s.bold}"
if start_at <= stop_at
puts "start_at <= stop_at, nothing to do"
next nil
end
puts "counting missing posts..."
num_posts = start_at - stop_at + 1
num_existing_posts =
Domain::Post::FaPost.where(fa_id: stop_at..start_at).count
num_missing_posts = num_posts - num_existing_posts
# done in reverse order, so start_at > stop_at
puts "total posts: #{helper.number_with_delimiter(num_posts).bold}"
puts "existing posts: #{helper.number_with_delimiter(num_existing_posts).bold}"
puts "missing posts: #{helper.number_with_delimiter(num_missing_posts).bold}"
low_water_mark = 50
high_water_mark = 300
poll_duration = 10
@@ -39,6 +69,8 @@ namespace :fa do
)
loop { sleep poll_duration if enqueuer.run_once == :sleep }
puts "setting global state key: #{global_state_key} to #{start_at.to_s.bold}"
GlobalState.set(global_state_key, start_at.to_s)
end
task enqueue_unscanned_ok_posts: %i[set_logger_stdout environment] do

View File

@@ -0,0 +1,38 @@
# typed: false
require "rails_helper"
RSpec.describe FaUriHelper do
describe "#parse_fa_media_url" do
it "parses a media URL with same timestamps" do
url =
"https://d.furaffinity.net/art/zzreg/1740700581/1740700581.zzreg_stippling-crop.jpg"
parsed = described_class.parse_fa_media_url(url)
expect(parsed).to eq(
FaUriHelper::FaMediaUrlInfo.new(
url_name: "zzreg",
original_file_posted: 1_740_700_581,
latest_file_posted: 1_740_700_581,
filename: "zzreg_stippling-crop.jpg",
),
)
expect(parsed.original_file_posted_at).to eq(Time.at(1_740_700_581))
expect(parsed.latest_file_posted_at).to eq(Time.at(1_740_700_581))
end
it "parses a media URL with different timestamps" do
url =
"https://d.furaffinity.net/art/zzreg/1753374875/1740700581.zzreg_stippling-crop.jpg"
parsed = described_class.parse_fa_media_url(url)
expect(parsed).to eq(
FaUriHelper::FaMediaUrlInfo.new(
url_name: "zzreg",
original_file_posted: 1_740_700_581,
latest_file_posted: 1_753_374_875,
filename: "zzreg_stippling-crop.jpg",
),
)
expect(parsed.original_file_posted_at).to eq(Time.at(1_740_700_581))
expect(parsed.latest_file_posted_at).to eq(Time.at(1_753_374_875))
end
end
end

View File

@@ -48,6 +48,53 @@ RSpec.describe Domain::Post::FaPost do
end
end
describe "posted_at" do
let(:post_url_str) do
"https://d.furaffinity.net/art/zzreg/1740700581/1740700581.zzreg_stippling-crop.jpg"
end
let(:post) { create(:domain_post_fa_post, posted_at: 1.day.ago) }
let(:post_file) do
create(:domain_post_file, post: post, url_str: post_url_str)
end
it "can be extracted from file uri if no explicit posted_at is set" do
post_file # ensure the file is created
post.posted_at = nil
post.save!
post.reload
expect(post.posted_at).to eq(Time.at(1_740_700_581))
end
it "uses the existing posted_at if it is set" do
post_file # ensure the file is created
expect(post.posted_at).to be_within(10.seconds).of(1.day.ago)
end
it "guesses it from the submission log entry if all else fails" do
# do not create the post file
post.posted_at = nil
post.last_submission_log_entry =
create(
:http_log_entry,
response:
create(
:blob_file,
contents:
File.read(
Rails.root.join(
"test/fixtures/files/domain/fa/submission/submission_page_59723907.html",
),
),
),
)
post.save!
post.reload
expect(post.posted_at).to be_within(1.minute).of(
Time.parse("Feb 1, 2025 07:15 AM PST"),
)
end
end
describe "attributes" do
let(:post) { build(:domain_post_fa_post) }
let(:time) { Time.now }