Files
redux-scraper/app/lib/tasks/inkbunny/enqueue_missing_posts_task.rb
2025-08-18 23:58:06 +00:00

96 lines
2.6 KiB
Ruby

# typed: strict
# frozen_string_literal: true
class Tasks::Inkbunny::EnqueueMissingPostsTask < EnqueueJobBase
extend T::Sig
include HasColorLogger
include Domain::Fa::HasCountFailedInQueue
sig { override.returns(String) }
def progress_key
"task-inkbunny-enqueue-missing-posts"
end
sig do
override
.params(
perform_max: T.nilable(Integer),
start_at: T.nilable(T.any(Integer, String)),
log_sink: T.any(IO, StringIO),
)
.void
end
def initialize(perform_max: nil, start_at: nil, log_sink: $stderr)
super(perform_max:, log_sink:)
@start_at =
T.let(
get_progress(start_at&.to_s)&.to_i ||
T.cast(Domain::Post::InkbunnyPost.maximum(:ib_id), Integer),
Integer,
)
end
sig { override.void }
def start_enqueuing
log("starting from ib_id: #{@start_at}") if @start_at
total_processed = 0
max_ib_post_id = @start_at
loop do
min_ib_post_id = [max_ib_post_id - 10_000, 0].max
missing_ib_post_ids_sql = <<~SQL
SELECT series.id
FROM generate_series(#{min_ib_post_id}, #{max_ib_post_id}) AS series(id)
LEFT JOIN domain_posts_ib_aux AS posts
ON series.id = posts.ib_id
WHERE posts.ib_id IS NULL
ORDER BY series.id DESC
LIMIT 100
SQL
missing_ib_post_ids =
ActiveRecord::Base
.connection
.execute(missing_ib_post_ids_sql)
.values
.flatten
.map(&:to_i)
missing_ib_post_ids = T.cast(missing_ib_post_ids, T::Array[Integer])
if found_min_id = missing_ib_post_ids.min
enqueue(always_recheck: true) do
Domain::Inkbunny::Job::UpdatePostsJob.perform_now(
ib_post_ids: missing_ib_post_ids,
)
end
# Move to continue from the lowest ID we just processed
max_ib_post_id = found_min_id
total_processed += missing_ib_post_ids.size
logger.info(
format_tags(
make_tags(total_processed:, this_batch: missing_ib_post_ids.size),
),
)
else
# No missing IDs found in this large range, move the window down
max_ib_post_id = min_ib_post_id
end
# Stop if we've reached the beginning
max_ib_post_id = [max_ib_post_id, 0].max
save_progress(max_ib_post_id.to_s)
logger.info("saved progress: #{max_ib_post_id}")
break if max_ib_post_id <= 0
break if interrupted?
end
end
sig { override.returns(Integer) }
def queue_size
count_failed_in_queue(%w[inkbunny static_file])
end
end