e621 user fav jobs fixes

This commit is contained in:
Dylan Knutson
2025-01-29 07:14:21 +00:00
parent 6c33c35a12
commit 8f81468fc0
15 changed files with 313 additions and 62 deletions

View File

@@ -6,4 +6,14 @@ class Domain::E621::Job::Base < Scraper::JobBase
def self.http_factory_method
:get_e621_http_client
end
sig { returns(Domain::E621::User) }
def user_from_args!
T.must(user_from_args)
end
sig { returns(T.nilable(Domain::E621::User)) }
def user_from_args
T.cast(arguments[0][:user], T.nilable(Domain::E621::User))
end
end

View File

@@ -15,6 +15,7 @@ class Domain::E621::Job::ScanPostFavsJob < Domain::E621::Job::Base
post = T.cast(args[:post], Domain::E621::Post)
page = 1
breaker = 0
total_created_users = 0
logger.info("scanning post #{post.e621_id} favs")
loop do
break if breaker > MAX_PAGES
@@ -48,16 +49,26 @@ class Domain::E621::Job::ScanPostFavsJob < Domain::E621::Job::Base
T::Hash[Integer, Domain::E621::User],
)
e621_id_to_user_row.values.each do |user_row|
user = e621_id_to_user[user_row.e621_id] || next
user =
e621_id_to_user[user_row.e621_id] ||
Domain::E621::User.new(
e621_user_id: user_row.e621_id,
name: user_row.name,
)
user.num_other_favs_cached = user_row.num_other_favs
total_created_users += 1 if user.new_record?
user.save!
end
logger.info("updated #{e621_id_to_user_row.size} user fav counts")
logger.info(
"[update user fav counts: #{e621_id_to_user_row.size}] [created users: #{total_created_users}]",
)
end
if rows.count < MAX_USERS_PER_PAGE
logger.info("no more users faving post, bailing")
logger.info(
"[no more users faving post, bailing] [total users: #{total_created_users}]",
)
break
end

View File

@@ -6,7 +6,11 @@ class Domain::E621::Job::ScanUserFavsJob < Domain::E621::Job::Base
sig { override.params(args: T.untyped).void }
def perform(args)
user = T.cast(args[:user], Domain::E621::User)
user = user_from_args!
if user.scanned_favs_status == "error" && !args[:force]
logger.info("[user #{user.e621_user_id} has error status, skipping]")
return
end
last_e621_post_id = T.let(nil, T.nilable(Integer))
breaker = T.let(0, Integer)
@@ -91,24 +95,22 @@ class Domain::E621::Job::ScanUserFavsJob < Domain::E621::Job::Base
if missing_e621_ids.any?
measure("#{prefix} [creating posts: #{missing_e621_ids.size}]") do
ReduxApplicationRecord.transaction do
missing_e621_ids.each do |e621_post_id|
post_json = T.must(e621_post_id_to_post_json[e621_post_id])
post =
Domain::E621::TagUtil.initialize_or_update_post(
post_json: post_json,
caused_by_entry: causing_log_entry,
)
was_new = post.new_record?
post.save!
e621_id_to_post_id[e621_post_id] = T.must(post.id)
if was_new
logger.info(
"#{prefix} [created post: e621 id #{post.e621_id} / id #{post.id}]",
)
total_new_posts += 1
defer_job(Domain::E621::Job::StaticFileJob, post: post)
end
missing_e621_ids.each do |e621_post_id|
post_json = T.must(e621_post_id_to_post_json[e621_post_id])
post =
Domain::E621::TagUtil.initialize_or_update_post(
post_json: post_json,
caused_by_entry: causing_log_entry,
)
was_new = post.new_record?
post.save!
e621_id_to_post_id[e621_post_id] = T.must(post.id)
if was_new
logger.info(
"#{prefix} [created post: e621 id #{post.e621_id} / id #{post.id}]",
)
total_new_posts += 1
defer_job(Domain::E621::Job::StaticFileJob, post: post)
end
end
end
@@ -151,7 +153,16 @@ class Domain::E621::Job::ScanUserFavsJob < Domain::E621::Job::Base
].join(" "),
)
user.scanned_favs_status = "ok"
user.scanned_favs_at = Time.current
user.save!
rescue StandardError
logger.error("error scanning user favs: #{user&.e621_user_id}")
user = user_from_args
if user
user.scanned_favs_status = "error"
user.save!
end
raise
end
end

View File

@@ -0,0 +1,42 @@
# typed: strict
class Domain::E621::Task::CollectPostFavsTask
extend T::Sig
include HasColorLogger
sig { params(max_page: Integer, query: String).void }
def run(max_page:, query:)
page = 1
while page <= max_page
url =
"https://e621.net/posts.json?tags=#{query.gsub(" ", "+")}&limit=320&page=#{page}"
logger.info "GET page=#{page}, max=#{max_page}, url=#{url}"
page += 1
post_jsons =
T.cast(
HTTP
.headers(
"User-Agent" => "FurryArchiver",
"Accept" => "application/json",
)
.get(url)
.parse[
"posts"
],
T::Array[T::Hash[String, T.untyped]],
)
post_jsons.each do |post_json|
post =
Domain::E621::TagUtil.initialize_or_update_post(post_json: post_json)
if post.nil?
logger.warn "post not found: #{post_json["id"]}"
next
end
post.save! if post.changed?
raise if post.new_record?
next if post.scanned_post_favs_at
Domain::E621::Job::ScanPostFavsJob.perform_now(post: post)
end
end
end
end

View File

@@ -2,6 +2,13 @@
class Domain::E621::User < ReduxApplicationRecord
self.table_name = "domain_e621_users"
include AttrJson::Record
include AttrJson::Record::QueryScopes
json_attributes_scope :scanned_favs_at
validates_inclusion_of :scanned_favs_status,
in: %w[ok error],
if: :scanned_favs_status?
has_many :favs, class_name: "Domain::E621::Fav", inverse_of: :user
attr_json :favs_are_hidden, :boolean
@@ -10,6 +17,8 @@ class Domain::E621::User < ReduxApplicationRecord
# on /posts/<post_id>/favorites?page=<n>
# Used to find users with a significant number of favorites
attr_json :num_other_favs_cached, :integer
attr_json :scanned_favs_status, :string
attr_json :scanned_favs_at, :datetime
sig { returns(T.nilable(::String)) }
def url_name

View File

@@ -11,6 +11,20 @@ class ReduxApplicationRecord < ActiveRecord::Base
after_update { observe(:update) }
after_destroy { observe(:destroy) }
sig { params(attr_name: Symbol).void }
def self.json_attributes_scope(attr_name)
scope :"where_#{attr_name}",
->(value) do
if value.nil? || value == :null
where("json_attributes->>'#{attr_name}' IS NULL")
elsif value == :not_null
where("json_attributes->>'#{attr_name}' IS NOT NULL")
else
where("json_attributes->>'#{attr_name}' = ?", value)
end
end
end
private
ACTIVE_RECORD_COUNTER =

View File

@@ -26,6 +26,8 @@
<%= render "good_job/arguments/domain_inkbunny_file", file: value %>
<% when Domain::E621::Post %>
<%= render "good_job/arguments/domain_e621_post", post: value %>
<% when Domain::E621::User %>
<%= render "good_job/arguments/domain_e621_user", user: value %>
<% else %>
<div class="text-truncate">
<code class="small" title="<%= value.inspect %>"

View File

@@ -0,0 +1,49 @@
<div class="d-flex align-items-center gap-2">
<%= link_to "https://e621.net/users/#{user.e621_user_id}",
class: "badge bg-primary",
target: "_blank" do %>
<i class="fa-solid fa-paw me-1"></i>Domain::E621::User #<%= user.id %>
<% end %>
<div class="d-flex align-items-center ms-auto gap-2">
<% if user.scanned_favs_status.present? %>
<span
class="badge <%= user.scanned_favs_status == "ok" ? "bg-success" : "bg-warning text-dark" %>"
>
<i
class="<%= if user.scanned_favs_status == "ok"
"fa-solid fa-check"
else
"fa-solid fa-exclamation-triangle"
end %> me-1"
></i>
<%= user.scanned_favs_status %>
</span>
<% end %>
<span class="badge bg-secondary">
<i class="fa-solid fa-user me-1"></i><%= user.name %>
</span>
<% if user.num_other_favs_cached.present? %>
<span class="badge bg-info text-dark">
<i class="fa-solid fa-heart me-1"></i><%= user.num_other_favs_cached %>
favs
</span>
<% end %>
<% if user.scanned_favs_at.present? %>
<span
class="badge bg-light text-dark"
title="<%= time_ago_in_words(user.scanned_favs_at) %> ago"
>
<i class="fa-regular fa-clock me-1"></i
><%= user.scanned_favs_at.strftime("%Y-%m-%d %H:%M:%S") %>
</span>
<% end %>
<% if user.favs_are_hidden %>
<span class="badge bg-danger">
<i class="fa-solid fa-eye-slash me-1"></i>Hidden
</span>
<% end %>
</div>
</div>

View File

@@ -0,0 +1,6 @@
class RelaxE621PostsMd5UniqueConstraint < ActiveRecord::Migration[7.2]
def change
remove_index :domain_e621_posts, :md5, unique: true
add_index :domain_e621_posts, :md5, unique: false
end
end

View File

@@ -0,0 +1,26 @@
# typed: strict
class MigrateE621ScannedFavsAtToJsonColumn < ActiveRecord::Migration[7.2]
extend T::Sig
sig { void }
def change
reversible do |dir|
dir.up do
execute <<~SQL
UPDATE domain_e621_users
SET json_attributes = jsonb_set(json_attributes, '{scanned_favs_at}', to_json(scanned_favs_at)::jsonb)
WHERE scanned_favs_at IS NOT NULL
SQL
remove_column :domain_e621_users, :scanned_favs_at
end
dir.down do
add_column :domain_e621_users, :scanned_favs_at, :datetime
execute <<~SQL
UPDATE domain_e621_users
SET scanned_favs_at = (json_attributes->>'scanned_favs_at')::text::timestamp
WHERE json_attributes->>'scanned_favs_at' IS NOT NULL
SQL
end
end
end
end

View File

@@ -101,7 +101,8 @@ COMMENT ON EXTENSION vector IS 'vector data type and ivfflat access method';
CREATE TYPE public.postable_type AS ENUM (
'Domain::Fa::Post',
'Domain::E621::Post',
'Domain::Inkbunny::Post'
'Domain::Inkbunny::Post',
'Domain::Sofurry::Post'
);
@@ -2081,7 +2082,6 @@ ALTER SEQUENCE public.domain_e621_tags_id_seq OWNED BY public.domain_e621_tags.i
CREATE TABLE public.domain_e621_users (
id bigint NOT NULL,
e621_user_id integer NOT NULL,
scanned_favs_at timestamp(6) without time zone,
name character varying NOT NULL,
created_at timestamp(6) without time zone NOT NULL,
updated_at timestamp(6) without time zone NOT NULL,
@@ -6063,15 +6063,15 @@ SET default_tablespace = '';
CREATE INDEX index_domain_e621_posts_on_json_attributes ON public.domain_e621_posts USING gin (json_attributes);
--
-- Name: index_domain_e621_posts_on_md5; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_domain_e621_posts_on_md5 ON public.domain_e621_posts USING btree (md5);
SET default_tablespace = mirai;
--
-- Name: index_domain_e621_posts_on_md5; Type: INDEX; Schema: public; Owner: -; Tablespace: mirai
--
CREATE UNIQUE INDEX index_domain_e621_posts_on_md5 ON public.domain_e621_posts USING btree (md5) WHERE (deleted_at IS NULL);
--
-- Name: index_domain_e621_posts_on_parent_e621_id; Type: INDEX; Schema: public; Owner: -; Tablespace: mirai
--
@@ -7845,6 +7845,8 @@ ALTER TABLE ONLY public.domain_twitter_tweets
SET search_path TO "$user", public;
INSERT INTO "schema_migrations" (version) VALUES
('20250129061805'),
('20250129061217'),
('20250128142856'),
('20250128041904'),
('20250127184150'),

View File

@@ -27,7 +27,7 @@ namespace :e621 do
user_query =
lambda do
Domain::E621::User
.where(scanned_favs_at: nil)
.where_scanned_favs_at(:null)
.where("json_attributes->>'num_other_favs_cached' is not null")
.order(Arel.sql "json_attributes->>'num_other_favs_cached' DESC")
.first
@@ -48,32 +48,6 @@ namespace :e621 do
query = $stdin.gets&.chomp || default_query
end
page = 1
while page <= max_page
url =
"https://e621.net/posts.json?tags=#{query.gsub(" ", "+")}&limit=320&page=#{page}"
puts "GET page=#{page}, max=#{max_page}, url=#{url}"
page += 1
post_jsons =
HTTP
.headers(
"User-Agent" => "FurryArchiver",
"Accept" => "application/json",
)
.get(url)
.parse[
"posts"
]
post_jsons.each do |post_json|
post = Domain::E621::Post.find_by(e621_id: post_json["id"])
if post.nil?
puts "post not found: #{post_json["id"]}"
next
end
next if post.scanned_post_favs_at
Domain::E621::Job::ScanPostFavsJob.perform_now(post: post)
end
end
Domain::E621::Task::CollectPostFavsTask.new.run(max_page:, query:)
end
end

View File

@@ -0,0 +1,16 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `Domain::E621::Task::CollectPostFavsTask`.
# Please instead update this file by running `bin/tapioca dsl Domain::E621::Task::CollectPostFavsTask`.
class Domain::E621::Task::CollectPostFavsTask
sig { returns(ColorLogger) }
def logger; end
class << self
sig { returns(ColorLogger) }
def logger; end
end
end

View File

@@ -485,6 +485,9 @@ class Domain::E621::User
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def joins(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def jsonb_contains(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def left_joins(*args, &blk); end
@@ -503,6 +506,9 @@ class Domain::E621::User
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def none(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def not_jsonb_contains(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def null_relation?(*args, &blk); end
@@ -564,6 +570,9 @@ class Domain::E621::User
sig { params(args: T.untyped).returns(PrivateAssociationRelation) }
def where(*args); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def where_scanned_favs_at(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateAssociationRelation) }
def with(*args, &blk); end
@@ -972,6 +981,9 @@ class Domain::E621::User
sig { void }
def restore_scanned_favs_at!; end
sig { void }
def restore_scanned_favs_status!; end
sig { void }
def restore_updated_at!; end
@@ -1029,6 +1041,12 @@ class Domain::E621::User
sig { returns(T::Boolean) }
def saved_change_to_scanned_favs_at?; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def saved_change_to_scanned_favs_status; end
sig { returns(T::Boolean) }
def saved_change_to_scanned_favs_status?; end
sig { returns(T.nilable([T.nilable(::ActiveSupport::TimeWithZone), T.nilable(::ActiveSupport::TimeWithZone)])) }
def saved_change_to_updated_at; end
@@ -1090,6 +1108,51 @@ class Domain::E621::User
sig { void }
def scanned_favs_at_will_change!; end
sig { returns(T.nilable(::String)) }
def scanned_favs_status; end
sig { params(value: T.nilable(::String)).returns(T.nilable(::String)) }
def scanned_favs_status=(value); end
sig { returns(T::Boolean) }
def scanned_favs_status?; end
sig { returns(T.nilable(::String)) }
def scanned_favs_status_before_last_save; end
sig { returns(T.untyped) }
def scanned_favs_status_before_type_cast; end
sig { returns(T::Boolean) }
def scanned_favs_status_came_from_user?; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def scanned_favs_status_change; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def scanned_favs_status_change_to_be_saved; end
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
def scanned_favs_status_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
sig { returns(T.nilable(::String)) }
def scanned_favs_status_in_database; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def scanned_favs_status_previous_change; end
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
def scanned_favs_status_previously_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
sig { returns(T.nilable(::String)) }
def scanned_favs_status_previously_was; end
sig { returns(T.nilable(::String)) }
def scanned_favs_status_was; end
sig { void }
def scanned_favs_status_will_change!; end
sig { returns(T.nilable(::ActiveSupport::TimeWithZone)) }
def updated_at; end
@@ -1172,6 +1235,9 @@ class Domain::E621::User
sig { returns(T::Boolean) }
def will_save_change_to_scanned_favs_at?; end
sig { returns(T::Boolean) }
def will_save_change_to_scanned_favs_status?; end
sig { returns(T::Boolean) }
def will_save_change_to_updated_at?; end
end
@@ -1231,6 +1297,9 @@ class Domain::E621::User
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def joins(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def jsonb_contains(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def left_joins(*args, &blk); end
@@ -1249,6 +1318,9 @@ class Domain::E621::User
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def none(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def not_jsonb_contains(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def null_relation?(*args, &blk); end
@@ -1310,6 +1382,9 @@ class Domain::E621::User
sig { params(args: T.untyped).returns(PrivateRelation) }
def where(*args); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def where_scanned_favs_at(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(PrivateRelation) }
def with(*args, &blk); end

View File

@@ -31,7 +31,9 @@ RSpec.describe Domain::E621::Job::ScanUserFavsJob do
describe "#perform" do
it "scans user favorites and creates fav records" do
perform_now({ user: user })
expect do perform_now({ user: user }) end.to change {
user.scanned_favs_status
}.from(nil).to("ok")
# Verify the posts were created
expect(Domain::E621::Post.count).to eq(5)
@@ -83,7 +85,7 @@ RSpec.describe Domain::E621::Job::ScanUserFavsJob do
it "handles HTTP errors appropriately" do
expect { perform_now({ user: user }) }.to raise_error(
/non 200 response/,
)
).and change { user.scanned_favs_status }.from(nil).to("error")
end
end
@@ -112,7 +114,9 @@ RSpec.describe Domain::E621::Job::ScanUserFavsJob do
user.reload.favs_are_hidden
}.from(nil).to(true).and change { user.reload.scanned_favs_at }.from(
nil,
).to(be_within(1.second).of(Time.current))
).to(be_within(1.second).of(Time.current)).and change {
user.reload.scanned_favs_status
}.from(nil).to("ok")
# Should not create any favs
expect(Domain::E621::Fav.count).to eq(0)