Update .gitignore, Gemfile, and various job classes for improved functionality

- Added '*.notes.md' and '*.export' to .gitignore to prevent unnecessary files from being tracked.
- Refactored job classes in the Domain::Fa module to enhance logging and job enqueuing processes, including:
  - Improved logging messages in Domain::Fa::Job::Base for better clarity.
  - Added support for a new 'enqueue_favs_scan' option in user job processing.
  - Enhanced the FavsJob to utilize active fav post joins and added a 'removed' flag for better management of favorites.
- Updated user and post models to include scopes for active favorites and improved error handling in user creation.
- Enhanced the page parser to support new formats for favorites pages and added tests for these changes.
This commit is contained in:
Dylan Knutson
2024-12-29 01:26:39 +00:00
parent e47b2997c8
commit ca914dbe25
22 changed files with 2843 additions and 220 deletions

4
.gitignore vendored
View File

@@ -12,6 +12,8 @@ lib/xdiff
ext/xdiff/Makefile
ext/xdiff/xdiff
*.notes.md
# Ignore bundler config.
/.bundle
@@ -54,4 +56,4 @@ ext/xdiff/xdiff
yarn-debug.log*
.yarn-integrity
.DS_Store
*.export
*.export

15
.vscode/launch.json vendored
View File

@@ -1,23 +1,12 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Debug staging server",
"type": "rdbg",
"request": "launch",
"command": "bundle",
"script": "exec",
"args": ["rails", "server"],
"env": {
"RUBY_DEBUG_OPEN": "true",
"RAILS_ENV": "staging"
}
},
{
"type": "rdbg",
"name": "Attach rdbg",
"request": "attach",
"debugPort": "/tmp/rdbg-1000/rdbg-492838"
"rdbgPath": "/usr/local/rvm/gems/default/bin/rdbg",
"debugPort": "/tmp/rdbg-1000/rdbg-45938"
}
]
}

View File

@@ -77,7 +77,7 @@ group :development, :staging do
gem "rails_live_reload",
git: "https://github.com/railsjazz/rails_live_reload",
ref: "dcd3b73904594e2c5134c2f6e05954f3937a8d29"
# gem "rails_live_reload", "0.3.6"
# gem "rails_live_reload", "0.4.0"
end
group :test do

View File

@@ -62,12 +62,12 @@ class Domain::Fa::Job::Base < Scraper::JobBase
unless @user.scan_due?(scan_type)
if @force_scan
logger.warn(
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - force scanning"
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - force scanning",
)
return true
else
logger.warn(
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - skipping"
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - skipping",
)
return false
end
@@ -117,7 +117,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
Domain::Fa::Post.where(
"fa_id >= ? AND fa_id <= ?",
min_fa_id,
max_fa_id
max_fa_id,
).pluck(:fa_id)
fa_ids_to_manually_enqueue.subtract(existing)
end
@@ -135,14 +135,14 @@ class Domain::Fa::Job::Base < Scraper::JobBase
job_type,
post,
submission,
caused_by_entry
caused_by_entry,
)
if post.creator
enqueue_user_scan(
post.creator,
caused_by_entry,
enqueue_page_scan: enqueue_page_scan,
enqueue_gallery_scan: enqueue_gallery_scan
enqueue_gallery_scan: enqueue_gallery_scan,
)
end
@@ -205,10 +205,12 @@ class Domain::Fa::Job::Base < Scraper::JobBase
user,
caused_by_entry,
enqueue_page_scan: true,
enqueue_gallery_scan: true
enqueue_gallery_scan: true,
enqueue_favs_scan: true
)
@users_enqueued_for_page_scan ||= Set.new
@users_enqueued_for_gallery_scan ||= Set.new
@users_enqueued_for_favs_scan ||= Set.new
args =
if user.persisted?
@@ -226,7 +228,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
if user.due_for_page_scan?
logger.info(
"enqueue user page job for #{user.url_name.bold}, " +
"last scanned #{time_ago_in_words(user.scanned_page_at)}"
"last scanned #{time_ago_in_words(user.scanned_page_at)}",
)
defer_job(Domain::Fa::Job::UserPageJob, args)
end
@@ -237,11 +239,21 @@ class Domain::Fa::Job::Base < Scraper::JobBase
if user.due_for_gallery_scan?
logger.info(
"enqueue user gallery job for #{user.url_name.bold}, " +
"last scanned #{time_ago_in_words(user.scanned_gallery_at)}"
"last scanned #{time_ago_in_words(user.scanned_gallery_at)}",
)
defer_job(Domain::Fa::Job::UserGalleryJob, args)
end
end
if enqueue_favs_scan && @users_enqueued_for_favs_scan.add?(user.url_name)
if user.due_for_favs_scan?
logger.info(
"enqueue user favs job for #{user.url_name.bold}, " +
"last scanned #{time_ago_in_words(user.scanned_favs_at)}",
)
defer_job(Domain::Fa::Job::FavsJob, args)
end
end
end
def normalize_enqueue_pri(enqueue_pri)
@@ -264,7 +276,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
defer_job(
Domain::Fa::Job::ScanPostJob,
{ fa_id: fa_id, caused_by_entry: caused_by_entry },
{ priority: enqueue_pri }
{ priority: enqueue_pri },
)
end
end
@@ -280,14 +292,14 @@ class Domain::Fa::Job::Base < Scraper::JobBase
defer_job(
Domain::Fa::Job::ScanPostJob,
{ post: post, caused_by_entry: caused_by_entry },
{ priority: enqueue_pri }
{ priority: enqueue_pri },
)
elsif !post.have_file?
logger.info "enqueue file scan for fa_id #{fa_id_str}"
defer_job(
Domain::Fa::Job::ScanFileJob,
{ post: post, caused_by_entry: caused_by_entry },
{ priority: enqueue_pri }
{ priority: enqueue_pri },
)
end
end

View File

@@ -9,11 +9,11 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
def perform(args)
@first_job_entry = nil
init_from_args!(args, build_user: false)
@full_scan = !!args[:full_scan]
@full_scan = !!args[:full_scan] || @force_scan
@user ||
begin
Domain::Fa::Job::UserPageJob.perform_later(
{ url_name: args[:url_name], caused_by_entry: best_caused_by_entry }
{ url_name: args[:url_name], caused_by_entry: best_caused_by_entry },
)
fatal_error("user does not exist: #{args}")
end
@@ -26,7 +26,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
@page_id = nil
@total_items_seen = 0
@seen_post_ids = Set.new
existing_faved_ids = Set.new(@user.fav_post_joins.pluck(:post_id))
existing_faved_ids = Set.new(@user.fav_post_joins.active.pluck(:post_id))
while true
ret = scan_page
@@ -54,7 +54,11 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
ReduxApplicationRecord.transaction do
if to_remove.any?
@user.fav_post_joins.where(post_id: to_remove).delete_all
@user
.fav_post_joins
.active
.where(post_id: to_remove)
.update_all(removed: true)
end
slice_size =
@@ -66,7 +70,11 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
if to_add.any?
to_add.each_slice(slice_size) do |slice|
@user.fav_post_joins.insert_all!(slice.map { |id| { post_id: id } })
@user.fav_post_joins.upsert_all(
slice.map { |id| { post_id: id, removed: false } },
unique_by: :index_domain_fa_favs_on_user_id_and_post_id,
update_only: [:removed],
)
end
end
@@ -92,13 +100,13 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
if response.status_code != 200
fatal_error(
"http #{response.status_code.to_s.red.bold}, " +
"log entry #{response.log_entry.id.to_s.bold}"
"log entry #{response.log_entry.id.to_s.bold}",
)
end
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
@user,
response
response,
)
logger.error("account disabled / not found, abort")
return :stop
@@ -124,7 +132,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
.map do |submission|
Domain::Fa::Post.hash_from_submission_parser_helper(
submission,
first_seen_log_entry: response.log_entry
first_seen_log_entry: response.log_entry,
)
end
@@ -135,7 +143,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
.map { |row| row["id"] } unless posts_to_create_hashes.empty?
enqueue_new_post_scan_jobs(
posts_to_create_hashes.map { |hash| hash[:fa_id] }
posts_to_create_hashes.map { |hash| hash[:fa_id] },
)
@last_page_post_ids = Set.new
@@ -151,7 +159,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
logger.info [
"[page #{@page_number.to_s.bold}]",
"[posts: #{submissions.length.to_s.bold}]",
"[created: #{posts_to_create_hashes.size.to_s.bold}]"
"[created: #{posts_to_create_hashes.size.to_s.bold}]",
].join(" ")
ret
@@ -161,7 +169,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
bulk_enqueue_jobs do
fa_ids.each do |fa_id|
Domain::Fa::Job::ScanPostJob.perform_later(
{ fa_id: fa_id, caused_by_entry: best_caused_by_entry }
{ fa_id: fa_id, caused_by_entry: best_caused_by_entry },
)
# sleep 100000
end

View File

@@ -67,18 +67,32 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
submission_elems.map do |elem|
Domain::Fa::Parser::ListedSubmissionParserHelper.new(
elem,
@page_version
@page_version,
)
end
end
def favorites_next_button_id
next_regex = %r{/favorites/.+/(\d+)/next/?}
button = @page.css(".gallery-section .pagination a.button.right").first
if button
href = button["href"]
match = %r{/favorites/.+/(\d+)/next/?}.match(href)
raise("invalid favs button uri #{href}") unless match
match[1].to_i
match = next_regex.match(href) || raise("invalid favs button uri #{href}")
return match[1]
end
form =
@page
.css(".gallery-navigation form")
.to_a
.filter { |f| next_regex.match(f["action"]) }
.first
if form
action = form["action"]
match =
next_regex.match(action) || raise("invalid favs next action #{action}")
match[1]
end
end
@@ -110,7 +124,7 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
"#gallery-frontpage-submissions > figure",
"#gallery-frontpage-writing > figure",
"#gallery-frontpage-music > figure",
"#gallery-frontpage-crafts > figure"
"#gallery-frontpage-crafts > figure",
].lazy.map { |css| @page.css(css) }.reject(&:empty?).to_a.flatten
else
unimplemented_version!
@@ -159,7 +173,7 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
#gallery-browse
#gallery-favorites
#gallery-frontpage-submissions
]
],
)
true
else
@@ -200,7 +214,7 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
Domain::Fa::Parser::SubmissionParserHelper.new(
@page,
@phtml,
@page_version
@page_version,
)
end

View File

@@ -40,23 +40,23 @@ class Domain::Fa::UserEnqueuer
types << "incremental"
end
# if user.due_for_favs_scan?
# Domain::Fa::Job::FavsJob.perform_later({ user: user })
# types << "favs"
# end
# if user.due_for_page_scan?
# Domain::Fa::Job::UserPageJob.perform_later({ user: user })
# types << "page"
# end
if user.due_for_favs_scan?
Domain::Fa::Job::FavsJob.perform_later({ user: user })
types << "favs"
end
if user.due_for_page_scan?
Domain::Fa::Job::UserPageJob.perform_later({ user: user })
types << "page"
end
if user.due_for_gallery_scan?
Domain::Fa::Job::UserGalleryJob.perform_later({ user: user })
types << "gallery"
end
# if user.due_for_follows_scan?
# Domain::Fa::Job::UserFollowsJob.perform_later({ user: user })
# types << "follows"
# end
if user.due_for_follows_scan?
Domain::Fa::Job::UserFollowsJob.perform_later({ user: user })
types << "follows"
end
end
avatar = user.ensure_avatar!

View File

@@ -2,6 +2,8 @@ class Domain::Fa::Fav < ReduxApplicationRecord
self.table_name = "domain_fa_favs"
self.primary_key = %i[user_id post_id]
scope :active, -> { where(removed: false) }
belongs_to :user, class_name: "::Domain::Fa::User"
belongs_to :post, class_name: "::Domain::Fa::Post"
end

View File

@@ -157,9 +157,13 @@ class Domain::Fa::Post < ReduxApplicationRecord
first_seen_log_entry: nil
)
creator =
Domain::Fa::User.find_or_create_by(
{ url_name: submission.artist_url_name },
) { |user| user.name = submission.artist }
begin
Domain::Fa::User.find_or_create_by!(
{ url_name: submission.artist_url_name },
) { |user| user.name = submission.artist }
rescue ActiveRecord::RecordNotFound => e
binding.pry
end
{
fa_id: submission.id,

View File

@@ -1,8 +1,9 @@
class Domain::Fa::PostFactor < ReduxApplicationRecord
self.table_name = "domain_fa_post_factors"
self.primary_key = "post_id"
belongs_to :post, class_name: "::Domain::Fa::Post"
FACTORS_WIDTHS = 16
FACTORS_WIDTHS = 8
has_neighbors :for_favorite
end

View File

@@ -47,12 +47,21 @@ class Domain::Fa::User < ReduxApplicationRecord
has_many :followed_by, through: :followed_joins, source: :follower
has_many :fav_post_joins, class_name: "::Domain::Fa::Fav", inverse_of: :user
has_many :active_fav_post_joins,
-> { where(removed: false) },
class_name: "::Domain::Fa::Fav",
inverse_of: :user
has_many :fav_posts,
class_name: "::Domain::Fa::Post",
through: :fav_post_joins,
source: :post
has_many :active_fav_posts,
class_name: "::Domain::Fa::Post",
through: :active_fav_post_joins,
source: :post
# FA `name` can be up to 30 chars long,
# `url_name` can be longer.
validates_presence_of(:name, :url_name)

View File

@@ -95,7 +95,7 @@
<% similar =
@post
.disco
&.nearest_neighbors(:for_favorite, distance: "euclidean")
&.nearest_neighbors(:for_favorite, distance: "cosine")
&.limit(10)
&.includes(:post) %>
<% if similar %>

View File

@@ -19,7 +19,12 @@ Rails.application.routes.draw do
namespace :domain do
namespace :fa do
resources :users, param: :url_name, only: [:show] do
resources :users,
param: :url_name,
only: [:show],
constraints: {
url_name: %r{[^/]+},
} do
resources :posts, controller: "/domain/fa/posts"
end
resources :posts, param: :fa_id, only: [:show] do

View File

@@ -0,0 +1,7 @@
class AddRemovedToDomainFaFavs < ActiveRecord::Migration[7.2]
def change
change_table :domain_fa_favs do |t|
t.boolean :removed, null: false, default: false
end
end
end

623
db/schema.rb generated

File diff suppressed because it is too large Load Diff

View File

@@ -21,7 +21,7 @@ describe Domain::Fa::Job::FavsJob do
it "enqueues a page scan job" do
perform_now({ url_name: "zzreg" }, should_raise: true)
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserPageJob)).to match(
[including(args: [including(url_name: "zzreg")])]
[including(args: [including(url_name: "zzreg")])],
)
end
end
@@ -36,8 +36,8 @@ describe Domain::Fa::Job::FavsJob do
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file("domain/fa/job/favs_zzreg_no_favs.html")
}
SpecUtil.read_fixture_file("domain/fa/job/favs_zzreg_no_favs.html"),
},
]
end
@@ -71,8 +71,8 @@ describe Domain::Fa::Job::FavsJob do
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/job/favs_zzreg_page_0_first.html"
)
"domain/fa/job/favs_zzreg_page_0_first.html",
),
},
{
uri: "https://www.furaffinity.net/favorites/zzreg/1074627373/next",
@@ -80,9 +80,9 @@ describe Domain::Fa::Job::FavsJob do
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/job/favs_zzreg_page_1_1074627373.html"
"domain/fa/job/favs_zzreg_page_1_1074627373.html",
),
caused_by_entry_idx: 0
caused_by_entry_idx: 0,
},
{
uri: "https://www.furaffinity.net/favorites/zzreg/475297391/next",
@@ -90,28 +90,28 @@ describe Domain::Fa::Job::FavsJob do
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/job/favs_zzreg_page_2_475297391.html"
"domain/fa/job/favs_zzreg_page_2_475297391.html",
),
caused_by_entry_idx: 0
}
caused_by_entry_idx: 0,
},
]
end
it "records favs for the user" do
expect do perform_now({ url_name: "zzreg" }) end.to change(
Domain::Fa::Post,
:count
:count,
).by(5)
posts = Domain::Fa::Post.where(fa_id: fa_ids)
expect(user.fav_posts).to match_array(posts)
expect(user.active_fav_posts).to match_array(posts)
end
it "creates missing users" do
expect(Domain::Fa::User.find_by(url_name: "sepulte")).to be_nil
expect do perform_now({ url_name: "zzreg" }) end.to change(
Domain::Fa::User,
:count
:count,
).by(5)
post = Domain::Fa::Post.find_by(fa_id: 52_106_426)
expect(post).not_to be_nil
@@ -131,25 +131,25 @@ describe Domain::Fa::Job::FavsJob do
it "removes favs no longer present" do
perform_now({ url_name: "zzreg" })
user.reload
expect(user.fav_posts).not_to include(old_post)
expect(user.active_fav_posts).not_to include(old_post)
end
it "adds favs newly present" do
perform_now({ url_name: "zzreg" })
posts = Domain::Fa::Post.where(fa_id: fa_ids)
expect(user.fav_posts).to match_array(posts)
expect(user.active_fav_posts).to match_array(posts)
end
it "creates new FA post models and enqueues scans" do
p1 = Domain::Fa::Post.create!(fa_id: fa_ids[0], creator: user)
expect do perform_now({ url_name: "zzreg" }) end.to change(
Domain::Fa::Post,
:count
:count,
).by(4)
user.reload
expect(user.fav_posts).to match_array(
Domain::Fa::Post.where(fa_id: fa_ids)
expect(user.active_fav_posts).to match_array(
Domain::Fa::Post.where(fa_id: fa_ids),
)
p1.reload
@@ -168,7 +168,7 @@ describe Domain::Fa::Job::FavsJob do
it "stops scanning after the first page" do
SpecUtil.init_http_client_mock(
http_client_mock,
client_mock_config[0...1]
client_mock_config[0...1],
)
perform_now({ url_name: "zzreg" })
user.reload
@@ -185,4 +185,40 @@ describe Domain::Fa::Job::FavsJob do
end
end
end
context "2024 format for favs pages" do
include_context "user exists"
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/favorites/zzreg/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/parser/redux/favs_zzreg_page_1_2024_format.html",
),
},
{
uri: "https://www.furaffinity.net/favorites/zzreg/1436907409/next",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/parser/redux/favs_zzreg_page_last_2024_format.html",
),
caused_by_entry_idx: 0,
},
]
end
it "records favs for the user" do
expect do perform_now({ url_name: "zzreg" }) end.to change(
Domain::Fa::Post,
:count,
).by(85)
expect(user.fav_posts.count).to eq(85)
end
end
end

View File

@@ -134,24 +134,24 @@ describe Domain::Fa::Parser::Page do
assert_equal(
{
href: "/gallery/feretta/folder/60236/Tale-of-Tails",
title: "Tale of Tails"
title: "Tale of Tails",
},
folders[1]
folders[1],
)
assert_equal(
{
href: "/gallery/feretta/folder/60234/Illustrations",
title: "Illustrations"
title: "Illustrations",
},
folders[2]
folders[2],
)
assert_equal(
{ href: "/gallery/feretta/folder/229520/Marketing", title: "Marketing" },
folders[9]
folders[9],
)
assert_equal(
{ href: "/gallery/feretta/folder/820310/3D", title: "3D" },
folders[13]
folders[13],
)
end
@@ -388,7 +388,7 @@ describe Domain::Fa::Parser::Page do
28_430_551,
50_976_208,
51_785_387,
52_449_028
52_449_028,
]
assert_equal up.recent_watchers.map(&:to_a),
@@ -404,7 +404,7 @@ describe Domain::Fa::Parser::Page do
%w[Lethal_Dose1 lethaldose1],
%w[Kingtiger2101 kingtiger2101],
%w[Nilla_Arts nillaarts],
%w[Riku_Anita rikuanita]
%w[Riku_Anita rikuanita],
]
assert_equal up.recent_watching.map(&:to_a),
@@ -420,7 +420,7 @@ describe Domain::Fa::Parser::Page do
%w[Tabuley tabuley],
%w[Braeburned braeburned],
%w[knifeDragon knifedragon],
%w[LotusGarden lotusgarden]
%w[LotusGarden lotusgarden],
]
end
@@ -443,6 +443,27 @@ describe Domain::Fa::Parser::Page do
assert_equal up.recent_watching, []
end
it "parses older format for favs pages" do
parser = get_parser("favs_zzreg_page_0_first.html")
assert_page_type parser, :probably_listings_page?
assert_equal parser.submissions_parsed.length, 2
assert_equal parser.favorites_next_button_id, "1074627373"
end
it "parses 2024 format for favs pages with a next button" do
parser = get_parser("favs_zzreg_page_1_2024_format.html")
assert_page_type parser, :probably_listings_page?
assert_equal parser.submissions_parsed.length, 48
assert_equal parser.favorites_next_button_id, "1436907409"
end
it "parses 2024 format for favs pages with no next button" do
parser = get_parser("favs_zzreg_page_last_2024_format.html")
assert_page_type parser, :probably_listings_page?
assert_equal parser.submissions_parsed.length, 37
assert_equal parser.favorites_next_button_id, nil
end
def get_parser(file, require_logged_in: true)
path = File.join("domain/fa/parser/redux", file)
contents =
@@ -450,7 +471,7 @@ describe Domain::Fa::Parser::Page do
parser =
Domain::Fa::Parser::Page.new(
contents,
require_logged_in: require_logged_in
require_logged_in: require_logged_in,
)
assert_equal Domain::Fa::Parser::Page::VERSION_2,
parser.page_version,

View File

@@ -17,11 +17,6 @@ RSpec.describe Domain::Fa::UserPolicy, type: :policy do
let(:user) { build(:user, :admin) }
let(:policy) { described_class.new(user, fa_user) }
before do
puts "Debug: User role is #{user.role.inspect}"
puts "Debug: User admin? #{user.admin?.inspect}"
end
it { expect(policy).to permit_action(:show) }
it { expect(policy).to permit_action(:view_scraped_at_timestamps) }
end

View File

@@ -1,5 +1,5 @@
# This file is copied to spec/ when you run 'rails generate rspec:install'
# ENV["RAILS_ENV"] ||= "test"
ENV["RAILS_ENV"] ||= "test"
require_relative "../config/environment"
require "spec_helper"
require "rspec/rails"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long