From a68e5b011232c302fd7bab9c39048815c3dfe67c Mon Sep 17 00:00:00 2001 From: Dylan Knutson Date: Mon, 18 Aug 2025 16:28:53 +0000 Subject: [PATCH] bsky fixes, ib missing post enqueuer --- app/lib/tasks/inkbunny.rb | 5 + .../inkbunny/enqueue_missing_posts_task.rb | 94 +++++++++++++++++++ app/models/domain/post_group.rb | 1 + app/models/domain/post_group/e621_pool.rb | 5 + app/models/domain/post_group/inkbunny_pool.rb | 5 + .../domain/post_group/sofurry_folder.rb | 5 + .../good_job/_custom_job_details.html.erb | 2 + .../arguments/_domain_post_group.html.erb | 50 ++++++++++ rake/bluesky.rake | 55 ++++++----- rake/ib.rake | 5 + 10 files changed, 203 insertions(+), 24 deletions(-) create mode 100644 app/lib/tasks/inkbunny.rb create mode 100644 app/lib/tasks/inkbunny/enqueue_missing_posts_task.rb create mode 100644 app/views/good_job/arguments/_domain_post_group.html.erb diff --git a/app/lib/tasks/inkbunny.rb b/app/lib/tasks/inkbunny.rb new file mode 100644 index 00000000..f3bda8f4 --- /dev/null +++ b/app/lib/tasks/inkbunny.rb @@ -0,0 +1,5 @@ +# typed: strict +# frozen_string_literal: true + +module Tasks::Inkbunny +end diff --git a/app/lib/tasks/inkbunny/enqueue_missing_posts_task.rb b/app/lib/tasks/inkbunny/enqueue_missing_posts_task.rb new file mode 100644 index 00000000..0f83eef9 --- /dev/null +++ b/app/lib/tasks/inkbunny/enqueue_missing_posts_task.rb @@ -0,0 +1,94 @@ +# typed: strict +# frozen_string_literal: true + +class Tasks::Inkbunny::EnqueueMissingPostsTask < EnqueueJobBase + extend T::Sig + include HasColorLogger + include Domain::Fa::HasCountFailedInQueue + + sig { override.returns(String) } + def progress_key + "task-inkbunny-enqueue-missing-posts" + end + + sig do + override + .params( + perform_max: T.nilable(Integer), + start_at: T.nilable(T.any(Integer, String)), + log_sink: T.any(IO, StringIO), + ) + .void + end + def initialize(perform_max: nil, start_at: nil, log_sink: $stderr) + super(perform_max:, log_sink:) + @start_at = + T.let( + get_progress(start_at&.to_s)&.to_i || + T.cast(Domain::Post::InkbunnyPost.maximum(:ib_id), Integer), + Integer, + ) + end + + sig { override.void } + def start_enqueuing + log("starting from ib_id: #{@start_at}") if @start_at + + total_processed = 0 + max_ib_post_id = @start_at + + loop do + min_ib_post_id = [max_ib_post_id - 10_000, 0].max + + missing_ib_post_ids_sql = <<~SQL + SELECT series.id + FROM generate_series(#{min_ib_post_id}, #{max_ib_post_id}) AS series(id) + LEFT JOIN domain_posts_ib_aux AS posts + ON series.id = posts.ib_id + WHERE posts.ib_id IS NULL + ORDER BY series.id DESC + LIMIT 100 + SQL + + missing_ib_post_ids = + ActiveRecord::Base + .connection + .execute(missing_ib_post_ids_sql) + .values + .flatten + .map(&:to_i) + missing_ib_post_ids = T.cast(missing_ib_post_ids, T::Array[Integer]) + + if found_min_id = missing_ib_post_ids.min + enqueue do + ColorLogger.quiet do + Domain::Inkbunny::Job::UpdatePostsJob.perform_now( + ib_post_ids: missing_ib_post_ids, + ) + end + end + # Move to continue from the lowest ID we just processed + max_ib_post_id = found_min_id + + total_processed += missing_ib_post_ids.size + logger.info( + format_tags( + make_tags(total_processed:, this_batch: missing_ib_post_ids.size), + ), + ) + else + # No missing IDs found in this large range, move the window down + max_ib_post_id = min_ib_post_id + end + + # Stop if we've reached the beginning + save_progress([max_ib_post_id, 0].max.to_s) + break if max_ib_post_id <= 0 + end + end + + sig { override.returns(Integer) } + def queue_size + count_failed_in_queue("inkbunny") + end +end diff --git a/app/models/domain/post_group.rb b/app/models/domain/post_group.rb index 14f0d0f4..1f0e9338 100644 --- a/app/models/domain/post_group.rb +++ b/app/models/domain/post_group.rb @@ -3,6 +3,7 @@ class Domain::PostGroup < ReduxApplicationRecord extend T::Helpers include AttrJsonRecordAliases include HasCompositeToParam + include HasDomainType self.table_name = "domain_post_groups" abstract! diff --git a/app/models/domain/post_group/e621_pool.rb b/app/models/domain/post_group/e621_pool.rb index 20641bcb..c19ded84 100644 --- a/app/models/domain/post_group/e621_pool.rb +++ b/app/models/domain/post_group/e621_pool.rb @@ -7,4 +7,9 @@ class Domain::PostGroup::E621Pool < Domain::PostGroup def self.param_prefix_and_attribute ["e621", :e621_id] end + + sig { override.returns(Domain::DomainType) } + def self.domain_type + Domain::DomainType::E621 + end end diff --git a/app/models/domain/post_group/inkbunny_pool.rb b/app/models/domain/post_group/inkbunny_pool.rb index e3644cd1..c94c099d 100644 --- a/app/models/domain/post_group/inkbunny_pool.rb +++ b/app/models/domain/post_group/inkbunny_pool.rb @@ -25,4 +25,9 @@ class Domain::PostGroup::InkbunnyPool < Domain::PostGroup "https://inkbunny.net/submissionsviewall.php?pool_id=#{self.ib_id}" end end + + sig { override.returns(Domain::DomainType) } + def self.domain_type + Domain::DomainType::Inkbunny + end end diff --git a/app/models/domain/post_group/sofurry_folder.rb b/app/models/domain/post_group/sofurry_folder.rb index 82765cb7..947c35c3 100644 --- a/app/models/domain/post_group/sofurry_folder.rb +++ b/app/models/domain/post_group/sofurry_folder.rb @@ -26,4 +26,9 @@ class Domain::PostGroup::SofurryFolder < Domain::PostGroup "https://www.sofurry.com/browse/folder/#{type}?by=#{owner_id}&folder=#{sofurry_id}" end end + + sig { override.returns(Domain::DomainType) } + def self.domain_type + Domain::DomainType::Sofurry + end end diff --git a/app/views/good_job/_custom_job_details.html.erb b/app/views/good_job/_custom_job_details.html.erb index 00d70e10..e7c3277b 100644 --- a/app/views/good_job/_custom_job_details.html.erb +++ b/app/views/good_job/_custom_job_details.html.erb @@ -25,6 +25,8 @@ <%= render "good_job/arguments/domain_user", user: job_arg.value %> <% when Domain::UserAvatar %> <%= render "good_job/arguments/domain_user_avatar", user_avatar: job_arg.value %> + <% when Domain::PostGroup %> + <%= render "good_job/arguments/domain_post_group", post_group: job_arg.value %> <% when GoodJob::Job %> <%= render "good_job/arguments/good_job_job", job: job_arg.value %> <% else %> diff --git a/app/views/good_job/arguments/_domain_post_group.html.erb b/app/views/good_job/arguments/_domain_post_group.html.erb new file mode 100644 index 00000000..9f376d0c --- /dev/null +++ b/app/views/good_job/arguments/_domain_post_group.html.erb @@ -0,0 +1,50 @@ +<%# Display post group information with associated details %> +
+
+ <%= link_to Rails.application.routes.url_helpers.domain_post_group_posts_path(post_group), + class: "badge bg-primary", + target: "_blank" do %> + <%= post_group.class.name %> #<%= post_group.id %> + <% end %> + <% prefix_attr = post_group.class.param_prefix_and_attribute %> + <% if prefix_attr && prefix_attr[1] %> + <% attr = prefix_attr[1] %> + <% attr_value = post_group.send(attr) %> + + <%= attr %>:<%= attr_value %> + + <% end %> + <% if post_group.respond_to?(:name) && post_group.name.present? %> + + <%= post_group.name %> + + <% end %> +
+
+ + <%= pluralize(post_group.posts.count, "post") %> + + <% if post_group.external_url_for_view.present? %> + <%= link_to post_group.external_url_for_view.to_s, + class: "badge bg-secondary text-truncate-link", + target: "_blank", + rel: "noopener noreferrer nofollow" do %> + <%= domain_abbreviation_for_model(post_group) %> + <% end %> + <% end %> + <% if post_group.respond_to?(:owner) && (owner = post_group.owner) %> + + <%= link_to domain_user_path(owner), + class: "text-white", + target: "_blank" do %> + <%= owner.name %> + <% end %> + + <% end %> + <% if post_group.created_at.present? %> + + <%= post_group.created_at.strftime("%Y-%m-%d %H:%M:%S") %> + + <% end %> +
+
diff --git a/rake/bluesky.rake b/rake/bluesky.rake index 92c9cdd7..ff3da7f3 100644 --- a/rake/bluesky.rake +++ b/rake/bluesky.rake @@ -16,24 +16,29 @@ module BlueskyRakeHelpers end sig do - params(handle: T.nilable(String), did: T.nilable(String)).returns( + params(handle_or_did: T.nilable(String)).returns( T.nilable(Domain::User::BlueskyUser), ) end - def self.user_from_env(handle: nil, did: nil) - if handle + def self.find_bsky_user(handle_or_did) + return nil if handle_or_did.blank? + if handle_or_did.starts_with?("did:") + did = handle_or_did + Domain::User::BlueskyUser.find_by(did:) || + begin + handle = resolve_handle(did) + return nil if handle.blank? + Domain::User::BlueskyUser.create!(did:, handle:) + end + else Domain::User::BlueskyUser.find_by(handle:) || begin - did = self.resolve_did(handle) + did = self.resolve_did(handle_or_did) return nil if did.blank? Domain::User::BlueskyUser.find_or_create_by!(did:) do |user| - user.handle = handle + user.handle = handle_or_did end end - elsif did - Domain::User::BlueskyUser.find_or_create_by!(did:) do |user| - user.handle = resolve_handle(did) - end end end end @@ -53,8 +58,7 @@ namespace :bluesky do Domain::Bluesky::MonitoredObject.build_for_hashtag(hashtag).save! puts "Added hashtag: ##{hashtag}" elsif ENV["handle"] || ENV["did"] - user = - BlueskyRakeHelpers.user_from_env(handle: ENV["handle"], did: ENV["did"]) + user = BlueskyRakeHelpers.find_bsky_user(ENV["handle"] || ENV["did"]) if user.nil? puts "user not found" next @@ -63,7 +67,7 @@ namespace :bluesky do Domain::Bluesky::Job::ScanPostsJob.perform_later(user:) Domain::Bluesky::MonitoredObject.build_for_user(user).save! else - raise "hashtag, handle, or did is required" + raise "hashtag or handle/did is required" end end @@ -71,17 +75,18 @@ namespace :bluesky do task add_bulk: :environment do added = 0 file = (ENV["file"] ? File.open(T.must(ENV["file"]), "r") : STDIN) + skip = ENV["skip"]&.to_i while line = file.gets + if skip.present? + skip -= 1 + next if skip > 0 + end + begin line = line.strip.chomp next if line.blank? - user = - if line.starts_with?("did:") - BlueskyRakeHelpers.user_from_env(did: line) - else - BlueskyRakeHelpers.user_from_env(handle: line) - end + user = BlueskyRakeHelpers.find_bsky_user(line) if user unless user.scanned_profile_at.present? Domain::Bluesky::Job::ScanUserJob.perform_later(user:) @@ -102,8 +107,12 @@ namespace :bluesky do loop do queue_size = GoodJob::Job - .where("queue_name IN ('bluesky', 'static_file')") - .where(finished_at: nil, performed_at: nil, error: nil) + .where( + finished_at: nil, + performed_at: nil, + error: nil, + queue_name: "bluesky", + ) .where( [ "(serialized_params->'exception_executions' = '{}')", @@ -166,10 +175,8 @@ namespace :bluesky do desc "Watch users that user follows" task watch_follows: :environment do user = - BlueskyRakeHelpers.user_from_env( - handle: ENV["handle"], - did: ENV["did"], - ) || raise("user is required, use `handle` or `did`") + BlueskyRakeHelpers.find_bsky_user(ENV["handle"] || ENV["did"]) || + raise("user is required, need a handle/did") Domain::Bluesky::Job::ScanUserFollowsJob.perform_now(user:) user.reload user.user_user_follows_from.each do |follow| diff --git a/rake/ib.rake b/rake/ib.rake index 4897125c..2f8f02ec 100644 --- a/rake/ib.rake +++ b/rake/ib.rake @@ -8,6 +8,11 @@ namespace :ib do Domain::Inkbunny::Job::LatestPostsJob.set(priority: -10).perform_later({}) end + desc "Enqueue missing posts" + task enqueue_missing_posts: :environment do + Tasks::Inkbunny::EnqueueMissingPostsTask.new.run + end + desc "set auth credentials" task set_auth: :environment do username = nil