153 lines
4.3 KiB
Ruby
153 lines
4.3 KiB
Ruby
# typed: strict
|
|
# frozen_string_literal: true
|
|
|
|
class FaBackfillFavs
|
|
include HasColorLogger
|
|
extend T::Sig
|
|
|
|
sig do
|
|
params(
|
|
start_at: T.nilable(Integer),
|
|
limit: T.nilable(Integer),
|
|
batch_size: T.nilable(Integer),
|
|
).void
|
|
end
|
|
def initialize(start_at: nil, limit: nil, batch_size: nil)
|
|
@start_at = start_at
|
|
@limit = limit
|
|
@batch_size = T.let(batch_size || 32, Integer)
|
|
@unique_users_seen = T.let(Set.new, T::Set[String])
|
|
@total_favs_seen = T.let(0, Integer)
|
|
@total_log_entries_processed = T.let(0, Integer)
|
|
end
|
|
|
|
sig { void }
|
|
def run
|
|
collect_favs_from_logs
|
|
end
|
|
|
|
private
|
|
|
|
sig { void }
|
|
def collect_favs_from_logs
|
|
scope =
|
|
HttpLogEntry
|
|
.includes(:response)
|
|
.where(uri_host: "www.furaffinity.net")
|
|
.where("uri_path LIKE ?", "/favorites/%")
|
|
|
|
scope.find_in_batches(
|
|
batch_size: @batch_size,
|
|
start: @start_at,
|
|
order: :desc,
|
|
) do |entries|
|
|
user_favs =
|
|
T.let(
|
|
Hash.new { |h, k| h[k] = Set.new },
|
|
T::Hash[String, T::Set[Integer]],
|
|
)
|
|
|
|
entries.each do |entry|
|
|
response = T.let(entry.response, T.nilable(BlobFile))
|
|
next unless response
|
|
|
|
contents = T.let(response.content_bytes, T.nilable(String))
|
|
next unless contents
|
|
|
|
page =
|
|
begin
|
|
Domain::Fa::Parser::Page.new(contents, require_logged_in: false)
|
|
rescue Domain::Fa::Parser::NotLoggedInError
|
|
next
|
|
end
|
|
uri_path = T.let(entry.uri_path, T.nilable(String))
|
|
next unless uri_path
|
|
|
|
# url is like /favorites/iiszed/1183854116/next
|
|
# extract url_name from the path with regex
|
|
url_name = uri_path.match(%r{/favorites/([^\/]+)})&.[](1)
|
|
next unless url_name
|
|
|
|
@total_log_entries_processed += 1
|
|
logger.info(
|
|
"[entry #{entry.id}] [processed #{@total_log_entries_processed}] favs for user #{url_name} - #{entry.uri_path}",
|
|
)
|
|
|
|
favs = T.must(user_favs[url_name])
|
|
|
|
page.submissions_parsed.each do |submission|
|
|
fa_id = submission.id
|
|
next unless fa_id
|
|
favs.add(fa_id)
|
|
end
|
|
|
|
break if @limit && @total_log_entries_processed >= @limit
|
|
end
|
|
|
|
process_users_favs(user_favs)
|
|
|
|
@total_log_entries_processed += entries.length
|
|
logger.info("Processed #{@total_log_entries_processed} HttpLogEntries")
|
|
logger.info(
|
|
"Unique users seen: #{@unique_users_seen.length}, Total favs seen: #{@total_favs_seen}",
|
|
)
|
|
if @limit && @total_log_entries_processed >= @limit
|
|
logger.info("Reached limit of #{@limit} HttpLogEntries, stopping")
|
|
break
|
|
end
|
|
end
|
|
end
|
|
|
|
sig { params(user_favs: T::Hash[String, T::Set[Integer]]).void }
|
|
def process_users_favs(user_favs)
|
|
url_name_to_user_id =
|
|
T.let(
|
|
Domain::Fa::User
|
|
.where(url_name: user_favs.keys)
|
|
.pluck(:url_name, :id)
|
|
.to_h,
|
|
T::Hash[String, Integer],
|
|
)
|
|
|
|
@unique_users_seen.merge(user_favs.keys)
|
|
@total_favs_seen +=
|
|
user_favs.values.reduce(0) { |acc, set| acc + set.length }
|
|
|
|
fa_ids = user_favs.values.reduce(Set.new) { |acc, set| acc.merge(set) }.to_a
|
|
|
|
# process a max of 10000 fa_ids at a time
|
|
fa_id_to_post_id = T.let({}, T::Hash[Integer, Integer])
|
|
fa_ids.each_slice(50_000) do |slice|
|
|
slice_fa_id_to_post_id =
|
|
T.let(
|
|
Domain::Fa::Post.where(fa_id: slice).pluck(:fa_id, :id).to_h,
|
|
T::Hash[Integer, Integer],
|
|
)
|
|
fa_id_to_post_id.merge!(slice_fa_id_to_post_id)
|
|
end
|
|
|
|
user_id_post_id_pairs = T.let([], T::Array[[Integer, Integer]])
|
|
|
|
user_favs.each do |url_name, post_fa_ids|
|
|
user_id =
|
|
T.must_because(url_name_to_user_id[url_name]) do
|
|
"url_name '#{url_name.inspect}' not found in url_name_to_user_id"
|
|
end
|
|
post_fa_ids.each do |post_fa_id|
|
|
post_id =
|
|
T.must_because(fa_id_to_post_id[post_fa_id]) do
|
|
"post fa_id '#{post_fa_id.inspect}' not found in fa_id_to_post_id"
|
|
end
|
|
user_id_post_id_pairs << [user_id, post_id]
|
|
end
|
|
end
|
|
|
|
user_id_post_id_pairs.each_slice(30_000) do |pairs|
|
|
Domain::Fa::Fav.upsert_all(
|
|
pairs.map { |user_id, post_id| { user_id:, post_id: } },
|
|
unique_by: :index_domain_fa_favs_on_user_id_and_post_id,
|
|
)
|
|
end
|
|
end
|
|
end
|