more e621 user fav migration tweaks

This commit is contained in:
Dylan Knutson
2025-02-20 02:04:42 +00:00
parent 6b1956c029
commit 8181628bb0
5 changed files with 134 additions and 40 deletions

View File

@@ -105,8 +105,8 @@ task migrate_to_domain: :environment do
migrator = Domain::MigrateToDomain.new
if only_domains.include?("e621")
migrator.migrate_e621_users(only_user: only_user)
migrator.migrate_e621_posts(only_user: only_user)
# migrator.migrate_e621_users(only_user: only_user)
# migrator.migrate_e621_posts(only_user: only_user)
migrator.migrate_e621_users_favs(only_user: only_user)
end
@@ -471,3 +471,25 @@ task sample_migrated_favs: :environment do
puts "old fav count: #{old_user.fav_posts.count}"
puts "new fav count: #{new_user.faved_posts.count}"
end
task clear_e621_user_favs_migrated_at: :environment do
puts "clearing migrated_user_favs_at"
ReduxApplicationRecord.connection.execute(<<~SQL)
UPDATE domain_users
SET json_attributes = json_attributes - 'migrated_user_favs_at'
WHERE type = 'Domain::User::E621User'
AND json_attributes->>'migrated_user_favs_at' IS NOT NULL
SQL
puts "done"
# query = Domain::User::E621User.where.not(migrated_user_favs_at: nil)
# pb = ProgressBar.create(total: query.count, format: "%t: %c/%C %B %p%% %a %e")
# query.find_in_batches(batch_size: 1000) do |b|
# ReduxApplicationRecord.transaction do
# b.each do |u|
# u.migrated_user_favs_at = nil
# u.save!
# end
# end
# pb.progress += b.size
# end
end

View File

@@ -214,6 +214,95 @@ class Domain::MigrateToDomain
sig { params(only_user: T.nilable(String)).void }
def migrate_e621_users_favs(only_user: nil)
logger.info "migrating e621 users favs"
# ReduxApplicationRecord.connection.execute(<<~SQL)
# -- Map old user IDs to new user IDs:
# DROP TABLE IF EXISTS e621_user_map;
# CREATE TABLE e621_user_map TABLESPACE mirai AS
# SELECT old_users.id AS old_user_id, new_users.id AS new_user_id
# FROM domain_e621_users old_users
# JOIN domain_users new_users
# ON new_users.json_attributes->>'e621_id' = old_users.e621_user_id
# WHERE new_users.type = 'Domain::User::E621User';
# CREATE INDEX idx_user_map_old_user_id ON e621_user_map(old_user_id, new_user_id) TABLESPACE mirai;
# CREATE INDEX idx_user_map_new_user_id ON e621_user_map(new_user_id, old_user_id) TABLESPACE mirai;
# ANALYZE e621_user_map;
# -- Map old post IDs to new post IDs:
# DROP TABLE IF EXISTS e621_post_map;
# CREATE TABLE e621_post_map TABLESPACE mirai AS
# SELECT old_posts.id AS old_post_id, new_posts.id AS new_post_id
# FROM domain_e621_posts old_posts
# JOIN domain_posts new_posts
# ON (new_posts.json_attributes->>'e621_id')::integer = old_posts.e621_id
# WHERE new_posts.type = 'Domain::Post::E621Post';
# CREATE INDEX idx_post_map_old_post_id ON e621_post_map(old_post_id, new_post_id) TABLESPACE mirai;
# CREATE INDEX idx_post_map_new_post_id ON e621_post_map(new_post_id, old_post_id) TABLESPACE mirai;
# ANALYZE e621_post_map;
# DO $$
# DECLARE
# v_user_ids bigint[];
# v_batch_size integer := 10; -- Adjust batch size as needed
# v_total_count integer;
# v_processed_count integer := 0;
# v_progress numeric;
# v_batch bigint[];
# BEGIN
# RAISE NOTICE 'Counting users...';
# -- Fetch all distinct user_ids into an array
# SELECT array_agg(domain_e621_users.id ORDER BY domain_e621_users.id)
# INTO v_user_ids
# FROM domain_e621_users
# INNER JOIN e621_user_map um ON domain_e621_users.id = um.old_user_id
# INNER JOIN domain_users du ON um.new_user_id = du.id
# WHERE du.type = 'Domain::User::E621User'
# AND du.json_attributes->>'migrated_user_favs_at' IS NULL;
# IF v_user_ids IS NULL THEN
# RAISE NOTICE 'No users found to process';
# RETURN;
# END IF;
# -- Get total user count for progress tracking
# v_total_count := array_length(v_user_ids, 1);
# RAISE NOTICE 'Total users to process: %', v_total_count;
# -- Loop over user IDs in batches
# FOR i IN 1..v_total_count BY v_batch_size LOOP
# -- Extract the current batch of users
# v_batch := v_user_ids[i:LEAST(i + v_batch_size - 1, v_total_count)];
# -- Insert batch for the current set of users
# INSERT INTO domain_user_post_favs (user_id, post_id)
# SELECT um.new_user_id, pm.new_post_id
# FROM domain_e621_favs old_favs
# JOIN e621_post_map pm ON old_favs.post_id = pm.old_post_id
# JOIN e621_user_map um ON old_favs.user_id = um.old_user_id
# WHERE old_favs.user_id = ANY(v_batch)
# ON CONFLICT (user_id, post_id) DO NOTHING;
# UPDATE domain_users
# SET json_attributes = jsonb_set(json_attributes, '{migrated_user_favs_at}', to_jsonb(now()))
# FROM e621_user_map um
# WHERE domain_users.id = um.new_user_id
# AND um.old_user_id = ANY(v_batch)
# AND domain_users.type = 'Domain::User::E621User';
# -- Update progress tracking
# v_processed_count := LEAST(i + v_batch_size - 1, v_total_count);
# v_progress := (v_processed_count::numeric / v_total_count::numeric) * 100;
# -- Log progress
# RAISE NOTICE 'Processed users % of % - user ids: % (Progress: % %%)',
# v_processed_count, v_total_count, v_batch, ROUND(v_progress, 2);
# -- COMMIT;
# END LOOP;
# END $$;
# SQL
if only_user
query = Domain::User::E621User.where(name: only_user)
else

View File

@@ -53,11 +53,16 @@ class Domain::User < ReduxApplicationRecord
missing_values = values - models
extra_values = models - values
::Domain::UserSearchName.upsert_all(
missing_values.map { |name| { user_id: id, name: name } },
unique_by: %i[user_id name],
)
::Domain::UserSearchName.where(user_id: id, name: extra_values).delete_all
if missing_values.any?
::Domain::UserSearchName.upsert_all(
missing_values.map { |name| { user_id: id, name: name } },
unique_by: %i[user_id name],
)
end
if extra_values.any?
::Domain::UserSearchName.where(user_id: id, name: extra_values).delete_all
end
end
sig(:final) { returns(T::Array[String]) }

View File

@@ -1,7 +1,14 @@
# typed: strict
class Domain::UserPostFav < ReduxApplicationRecord
self.table_name = "domain_user_post_favs"
self.primary_key = %i[user_id post_id]
belongs_to :user, class_name: "Domain::User", inverse_of: :user_post_favs
belongs_to :post, class_name: "Domain::Post", inverse_of: :user_post_favs
scope :for_post_type,
->(post_klass) do
post_klass = T.cast(post_klass, T.class_of(Domain::Post))
joins(:post).where(post: { type: post_klass.name })
end
end

View File

@@ -64,48 +64,19 @@ namespace :e621 do
desc "Show statistics about e621 favs"
task fav_stats: :environment do
puts "counting total cached..."
total_cached =
ReduxApplicationRecord.connection.execute(<<~SQL).first["total"]
select SUM((json_attributes->>'num_other_favs_cached')::int) as total
from domain_e621_users
SQL
total_have = Domain::E621::Fav.count
puts "counting total have..."
total_have = Domain::UserPostFav.for_post_type(Domain::Post::E621Post).count
helper = Class.new.extend(ActionView::Helpers::NumberHelper)
puts "total cached: #{helper.number_with_delimiter(total_cached)}"
puts "total have: #{helper.number_with_delimiter(total_have)}"
puts "percent cached: #{((total_have.to_f / total_cached.to_f) * 100).round(1)}%"
end
desc "debug sql"
task debug_sql: :environment do
# Domain::Post::E621Post.where(e621_id: 5350363)
# puts Domain::Post::E621Post.where(e621_id: 5_350_363).explain.inspect
# puts Domain::Post::FaPost.where(fa_id: 52_801_830).explain.inspect
# puts Domain::Fa::Post.where(fa_id: 52_801_830).explain.inspect
# puts Domain::Post::FaPost
# .select(:fa_id)
# .where(fa_id: 52_801_830)
# .explain
# .inspect
query = Domain::E621::Post.joins(domain_posts: :e621_id)
# "LEFT JOIN domain_posts ON domain_e621_posts.e621_id =
# (domain_posts.json_attributes->>'e621_id')::integer
# AND domain_posts.type = 'Domain::Post::E621Post'",
# ).where("domain_posts.id IS NULL")
puts query.explain.inspect
# puts Domain::Fa::Post
# .joins(
# "
# LEFT JOIN domain_posts ON domain_fa_posts.fa_id =
# (domain_posts.json_attributes->>'fa_id')::integer
# ",
# )
# .where(domain_posts: { id: nil, type: "Domain::Post::FaPost" })
# .explain
# .inspect
end
end