backfill job handles url_names correctly
This commit is contained in:
@@ -15,9 +15,10 @@ class FaBackfillFavs
|
||||
def initialize(start_at: nil, limit: nil, batch_size: nil)
|
||||
@start_at = start_at
|
||||
@limit = limit
|
||||
@batch_size = T.let(batch_size || 100, Integer)
|
||||
@batch_size = T.let(batch_size || 32, Integer)
|
||||
@unique_users_seen = T.let(Set.new, T::Set[String])
|
||||
@total_favs_seen = T.let(0, Integer)
|
||||
@total_log_entries_processed = T.let(0, Integer)
|
||||
end
|
||||
|
||||
sig { void }
|
||||
@@ -30,16 +31,15 @@ class FaBackfillFavs
|
||||
sig { void }
|
||||
def collect_favs_from_logs
|
||||
scope =
|
||||
HttpLogEntry.where(uri_host: "www.furaffinity.net").where(
|
||||
"uri_path LIKE ?",
|
||||
"/favorites/%",
|
||||
)
|
||||
|
||||
total_processed = 0
|
||||
HttpLogEntry
|
||||
.includes(:response)
|
||||
.where(uri_host: "www.furaffinity.net")
|
||||
.where("uri_path LIKE ?", "/favorites/%")
|
||||
|
||||
scope.find_in_batches(
|
||||
batch_size: @batch_size,
|
||||
start: @start_at,
|
||||
order: :desc,
|
||||
) do |entries|
|
||||
user_favs =
|
||||
T.let(
|
||||
@@ -54,13 +54,25 @@ class FaBackfillFavs
|
||||
contents = T.let(response.contents, T.nilable(String))
|
||||
next unless contents
|
||||
|
||||
page = Domain::Fa::Parser::Page.new(contents)
|
||||
page =
|
||||
begin
|
||||
Domain::Fa::Parser::Page.new(contents, require_logged_in: false)
|
||||
rescue Domain::Fa::Parser::NotLoggedInError
|
||||
next
|
||||
end
|
||||
uri_path = T.let(entry.uri_path, T.nilable(String))
|
||||
next unless uri_path
|
||||
|
||||
url_name = uri_path.split("/").last
|
||||
# url is like /favorites/iiszed/1183854116/next
|
||||
# extract url_name from the path with regex
|
||||
url_name = uri_path.match(%r{/favorites/([^\/]+)})&.[](1)
|
||||
next unless url_name
|
||||
|
||||
@total_log_entries_processed += 1
|
||||
logger.info(
|
||||
"[entry #{entry.id}] [processed #{@total_log_entries_processed}] favs for user #{url_name} - #{entry.uri_path}",
|
||||
)
|
||||
|
||||
favs = T.must(user_favs[url_name])
|
||||
|
||||
page.submissions_parsed.each do |submission|
|
||||
@@ -68,16 +80,18 @@ class FaBackfillFavs
|
||||
|
||||
favs.add(submission.id)
|
||||
end
|
||||
|
||||
break if @limit && @total_log_entries_processed >= @limit
|
||||
end
|
||||
|
||||
process_users_favs(user_favs)
|
||||
|
||||
total_processed += entries.length
|
||||
logger.info("Processed #{total_processed} HttpLogEntries")
|
||||
@total_log_entries_processed += entries.length
|
||||
logger.info("Processed #{@total_log_entries_processed} HttpLogEntries")
|
||||
logger.info(
|
||||
"Unique users seen: #{@unique_users_seen.length}, Total favs seen: #{@total_favs_seen}",
|
||||
)
|
||||
if @limit && total_processed >= @limit
|
||||
if @limit && @total_log_entries_processed >= @limit
|
||||
logger.info("Reached limit of #{@limit} HttpLogEntries, stopping")
|
||||
break
|
||||
end
|
||||
@@ -117,12 +131,12 @@ class FaBackfillFavs
|
||||
user_favs.each do |url_name, post_fa_ids|
|
||||
user_id =
|
||||
T.must_because(url_name_to_user_id[url_name]) do
|
||||
"url_name #{url_name} not found in url_name_to_user_id"
|
||||
"url_name '#{url_name.inspect}' not found in url_name_to_user_id"
|
||||
end
|
||||
post_fa_ids.each do |post_fa_id|
|
||||
post_id =
|
||||
T.must_because(fa_id_to_post_id[post_fa_id]) do
|
||||
"post fa_id #{post_fa_id} not found in fa_id_to_post_id"
|
||||
"post fa_id '#{post_fa_id.inspect}' not found in fa_id_to_post_id"
|
||||
end
|
||||
user_id_post_id_pairs << [user_id, post_id]
|
||||
end
|
||||
|
||||
@@ -42,7 +42,7 @@ describe FaBackfillFavs do
|
||||
# Create HttpLogEntry records for each user's favorites page
|
||||
HttpLogEntry.create!(
|
||||
uri_host: "www.furaffinity.net",
|
||||
uri_path: "/favorites/iiszed",
|
||||
uri_path: "/favorites/iiszed/1183854116/next",
|
||||
uri_scheme: "https",
|
||||
verb: :get,
|
||||
performed_by: "direct",
|
||||
|
||||
Reference in New Issue
Block a user