backfill fa posted_at field

This commit is contained in:
Dylan Knutson
2025-02-12 05:22:51 +00:00
parent 369fa71007
commit 9c38bfce13
11 changed files with 291 additions and 101 deletions

View File

@@ -94,14 +94,60 @@ task :reverse_csv do
end
task migrate_domain: :environment do
# Domain::MigrateToDomain.new.migrate_e621_users
# Domain::MigrateToDomain.new.migrate_e621_posts
# Domain::MigrateToDomain.new.migrate_fa_users
# Domain::MigrateToDomain.new.migrate_fa_posts
# Domain::MigrateToDomain.new.migrate_e621_users_favs
# Domain::MigrateToDomain.new.migrate_fa_users_favs
# Domain::MigrateToDomain.new.migrate_fa_users_followed_users
# Domain::MigrateToDomain.new.migrate_inkbunny_users
Domain::MigrateToDomain.new.migrate_inkbunny_posts
Domain::MigrateToDomain.new.migrate_inkbunny_pools
only_user = ENV["only_user"]
migrator = Domain::MigrateToDomain.new
migrator.migrate_e621_users(only_user: only_user)
migrator.migrate_e621_posts(only_user: only_user)
migrator.migrate_fa_users(only_user: only_user)
migrator.migrate_fa_posts(only_user: only_user)
migrator.migrate_e621_users_favs(only_user: only_user)
migrator.migrate_fa_users_favs(only_user: only_user)
migrator.migrate_fa_users_followed_users(only_user: only_user)
migrator.migrate_inkbunny_users(only_user: only_user)
migrator.migrate_inkbunny_posts(only_user: only_user)
migrator.migrate_inkbunny_pools(only_user: nil) if only_user.nil?
end
task infer_last_submission_log_entries: :environment do
only_fa_id = ENV["only_fa_id"]
if only_fa_id
relation = Domain::Fa::Post.where(fa_id: only_fa_id)
else
relation =
Domain::Fa::Post
.where(state: :ok)
.where(last_submission_page_id: nil)
.or(Domain::Fa::Post.where(state: :ok).where(posted_at: nil))
end
relation.find_each do |post|
parts = ["[id: #{post.id}]", "[fa_id: #{post.fa_id}]"]
log_entry = post.guess_last_submission_page
if log_entry
contents = log_entry.response&.contents
if contents
parser = Domain::Fa::Parser::Page.new(contents)
if parser.submission_not_found?
parts << "[removed]"
post.state = :removed
else
posted_at = parser.submission.posted_date
parts << "[posted_at_parser: #{posted_at}]" if posted_at
end
end
if post.last_submission_page_id.present? &&
log_entry.id != post.last_submission_page_id
parts << "[overwrite]"
end
post.last_submission_page_id = log_entry.id
posted_at = post.posted_at ||= post.guess_posted_at
parts << "[posted_at_attr: #{posted_at}]" if posted_at
parts << "[submission log entry: #{log_entry.id}]"
parts << "[uri: #{log_entry.uri.to_s}]"
puts parts.join(" ")
post.save!
end
end
end

View File

@@ -8,13 +8,17 @@ class Domain::MigrateToDomain
@pb_sink = pb_sink
end
sig { void }
def migrate_e621_users
sig { params(only_user: T.nilable(String)).void }
def migrate_e621_users(only_user: nil)
logger.info "migrating e621 users"
query =
Domain::E621::User.where.not(
e621_user_id: Domain::User::E621User.select(:e621_id),
)
if only_user
query = Domain::E621::User.where(name: only_user)
else
query =
Domain::E621::User.where.not(
e621_user_id: Domain::User::E621User.select(:e621_id),
)
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -23,22 +27,33 @@ class Domain::MigrateToDomain
output: @pb_sink,
)
query.find_in_batches(batch_size: 10_000) do |batch|
migrate_batch(Domain::User::E621User, batch) do |old_model|
initialize_e621_user_from(old_model)
end
migrate_batch(
Domain::User::E621User,
batch,
unique_by: [:idx_domain_e621_users_on_e621_id],
) { |old_model| initialize_e621_user_from(old_model) }
pb.progress = [pb.progress + batch.size, pb.total].min
end
end
sig { void }
def migrate_e621_posts
sig { params(only_user: T.nilable(String)).void }
def migrate_e621_posts(only_user: nil)
logger.info "migrating e621 posts"
query =
Domain::E621::Post.joins(
"LEFT JOIN domain_posts ON domain_e621_posts.e621_id =
if only_user
user = Domain::E621::User.find_by(name: only_user)
if user.nil?
logger.info "e621 user not found: #{only_user}"
return
end
query = user.faved_posts
else
query =
Domain::E621::Post.joins(
"LEFT JOIN domain_posts ON domain_e621_posts.e621_id =
(domain_posts.json_attributes->>'e621_id')::integer
AND domain_posts.type = 'Domain::Post::E621Post'",
).where("domain_posts.id IS NULL")
).where("domain_posts.id IS NULL")
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -65,15 +80,27 @@ class Domain::MigrateToDomain
end
end
sig { void }
def migrate_fa_users
sig { params(only_user: T.nilable(String)).void }
def migrate_fa_users(only_user: nil)
logger.info "migrating fa users"
query =
Domain::Fa::User.joins(
"LEFT JOIN domain_users ON domain_fa_users.url_name =
if only_user
user = Domain::Fa::User.find_by(url_name: only_user)
if user.nil?
logger.info "fa user not found: #{only_user}"
return
end
query =
Domain::Fa::User.where(url_name: only_user).or(
Domain::Fa::User.where(id: user.follows.select(:id)),
)
else
query =
Domain::Fa::User.joins(
"LEFT JOIN domain_users ON domain_fa_users.url_name =
domain_users.json_attributes->>'url_name'
AND domain_users.type = 'Domain::User::FaUser'",
).where("domain_users.id IS NULL")
).where("domain_users.id IS NULL")
end
pb =
ProgressBar.create(
@@ -85,9 +112,11 @@ class Domain::MigrateToDomain
query.find_in_batches(batch_size: 10_000) do |batch|
ReduxApplicationRecord.transaction do
models =
migrate_batch(Domain::User::FaUser, batch) do |old_user|
initialize_fa_user_from(old_user)
end
migrate_batch(
Domain::User::FaUser,
batch,
unique_by: [:idx_domain_fa_users_on_url_name],
) { |old_user| initialize_fa_user_from(old_user) }
migrate_batch(Domain::UserAvatar, models.filter(&:avatar)) do |user|
avatar = T.must(user.avatar)
@@ -100,16 +129,30 @@ class Domain::MigrateToDomain
end
end
sig { void }
def migrate_fa_posts
sig { params(only_user: T.nilable(String)).void }
def migrate_fa_posts(only_user: nil)
logger.info "migrating fa posts"
old_fa_ids = Domain::Fa::Post.pluck(:fa_id)
logger.info "old_fa_ids: #{old_fa_ids.size}"
new_fa_ids = Domain::Post::FaPost.pluck(:fa_id)
logger.info "new_fa_ids: #{new_fa_ids.size}"
missing_fa_ids = old_fa_ids - new_fa_ids
missing_fa_ids.sort!
logger.info "missing_fa_ids: #{missing_fa_ids.size}"
if only_user
user = Domain::Fa::User.find_by(url_name: only_user)
if user.nil?
logger.info "fa user not found: #{only_user}"
return
end
old_fa_ids =
(user.posts.pluck(:fa_id) + user.fav_posts.pluck(:fa_id)).uniq
missing_fa_ids = old_fa_ids
logger.info "missing_fa_ids: #{missing_fa_ids.size}"
missing_fa_ids.sort!
else
old_fa_ids = Domain::Fa::Post.pluck(:fa_id)
logger.info "old_fa_ids: #{old_fa_ids.size}"
new_fa_ids = Domain::Post::FaPost.pluck(:fa_id)
logger.info "new_fa_ids: #{new_fa_ids.size}"
missing_fa_ids = old_fa_ids - new_fa_ids
missing_fa_ids.sort!
logger.info "missing_fa_ids: #{missing_fa_ids.size}"
end
pb =
ProgressBar.create(
@@ -123,14 +166,25 @@ class Domain::MigrateToDomain
batch =
Domain::Fa::Post.where(fa_id: fa_ids).includes(:creator, :file).to_a
ReduxApplicationRecord.transaction do
if only_user
migrate_batch(
Domain::User::FaUser,
batch.map(&:creator).compact.uniq,
unique_by: [:idx_domain_fa_users_on_url_name],
) { |user| initialize_fa_user_from(user) }
end
initialized_models =
migrate_batch(Domain::Post::FaPost, batch) do |old_post|
initialize_fa_post_from(old_post)
end
migrate_batch(
Domain::Post::FaPost,
batch,
unique_by: [:idx_domain_fa_posts_on_fa_id],
) { |old_post| initialize_fa_post_from(old_post) }
migrate_batch(
Domain::PostFile,
initialized_models.filter(&:file),
unique_by: [:index_domain_post_files_on_log_entry_id],
) do |post|
file = T.must(post.file)
file.post_id = post.id
@@ -140,6 +194,7 @@ class Domain::MigrateToDomain
migrate_batch(
Domain::UserPostCreation,
initialized_models.filter(&:primary_user_post_creation),
unique_by: [:index_domain_user_post_creations_on_user_id_and_post_id],
) do |post|
user_post_creation = T.must(post.primary_user_post_creation)
user_post_creation.post_id = post.id
@@ -150,10 +205,14 @@ class Domain::MigrateToDomain
end
end
sig { void }
def migrate_e621_users_favs
sig { params(only_user: T.nilable(String)).void }
def migrate_e621_users_favs(only_user: nil)
logger.info "migrating e621 users favs"
query = Domain::User::E621User.where(migrated_user_favs_at: nil)
if only_user
query = Domain::User::E621User.where(name: only_user)
else
query = Domain::User::E621User.where(migrated_user_favs_at: nil)
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -167,10 +226,14 @@ class Domain::MigrateToDomain
end
end
sig { void }
def migrate_fa_users_favs
sig { params(only_user: T.nilable(String)).void }
def migrate_fa_users_favs(only_user: nil)
logger.info "migrating fa users favs"
query = Domain::User::FaUser.where(migrated_user_favs_at: nil)
if only_user
query = Domain::User::FaUser.where(url_name: only_user)
else
query = Domain::User::FaUser.where(migrated_user_favs_at: nil)
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -184,10 +247,14 @@ class Domain::MigrateToDomain
end
end
sig { void }
def migrate_fa_users_followed_users
sig { params(only_user: T.nilable(String)).void }
def migrate_fa_users_followed_users(only_user: nil)
logger.info "migrating fa followed users"
query = Domain::User::FaUser.where(migrated_followed_users_at: nil)
if only_user
query = Domain::User::FaUser.where(url_name: only_user)
else
query = Domain::User::FaUser.where(migrated_followed_users_at: nil)
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -203,13 +270,17 @@ class Domain::MigrateToDomain
end
end
sig { void }
def migrate_inkbunny_users
sig { params(only_user: T.nilable(String)).void }
def migrate_inkbunny_users(only_user: nil)
logger.info "migrating inkbunny users"
query =
Domain::Inkbunny::User.where.not(
ib_user_id: Domain::User::InkbunnyUser.select(:ib_id),
)
if only_user
query = Domain::Inkbunny::User.where(name: only_user)
else
query =
Domain::Inkbunny::User.where.not(
ib_user_id: Domain::User::InkbunnyUser.select(:ib_id),
)
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -220,9 +291,11 @@ class Domain::MigrateToDomain
query.find_in_batches(batch_size: 10_000) do |batch|
ReduxApplicationRecord.transaction do
models =
migrate_batch(Domain::User::InkbunnyUser, batch) do |old_user|
initialize_inkbunny_user_from(old_user)
end
migrate_batch(
Domain::User::InkbunnyUser,
batch,
unique_by: [:idx_domain_inkbunny_users_on_ib_id],
) { |old_user| initialize_inkbunny_user_from(old_user) }
migrate_batch(Domain::UserAvatar, models.filter(&:avatar)) do |user|
avatar = T.must(user.avatar)
@@ -235,14 +308,22 @@ class Domain::MigrateToDomain
end
end
sig { void }
def migrate_inkbunny_posts
sig { params(only_user: T.nilable(String)).void }
def migrate_inkbunny_posts(only_user: nil)
logger.info "migrating inkbunny posts"
query =
Domain::Inkbunny::Post
.where.not(ib_post_id: Domain::Post::InkbunnyPost.select(:ib_id))
.includes(:creator, { files: :log_entry }, :pools)
if only_user
user = Domain::Inkbunny::User.find_by(name: only_user)
if user.nil?
logger.info "inkbunny user not found: #{only_user}"
return
end
query = user.posts
else
query =
Domain::Inkbunny::Post
.where.not(ib_post_id: Domain::Post::InkbunnyPost.select(:ib_id))
.includes(:creator, { files: :log_entry }, :pools)
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -254,13 +335,16 @@ class Domain::MigrateToDomain
query.find_in_batches(batch_size: 10_000) do |batch|
ReduxApplicationRecord.transaction do
models =
migrate_batch(Domain::Post::InkbunnyPost, batch) do |old_post|
initialize_inkbunny_post(old_post)
end
migrate_batch(
Domain::Post::InkbunnyPost,
batch,
unique_by: [:idx_domain_inkbunny_posts_on_ib_id],
) { |old_post| initialize_inkbunny_post(old_post) }
migrate_batch(
Domain::UserPostCreation,
models.filter(&:primary_user_post_creation),
unique_by: [:index_domain_user_post_creations_on_user_id_and_post_id],
) do |post|
user_post_creation = T.must(post.primary_user_post_creation)
user_post_creation.post_id = post.id
@@ -274,19 +358,35 @@ class Domain::MigrateToDomain
migrate_batch(
Domain::PostFile::InkbunnyPostFile,
models.flat_map(&:files),
unique_by: [:idx_domain_inkbunny_post_files_on_ib_id],
) { |file| file }
end
pb.progress = [pb.progress + batch.size, pb.total].min
end
end
sig { void }
def migrate_inkbunny_pools
sig { params(only_user: T.nilable(String)).void }
def migrate_inkbunny_pools(only_user: nil)
logger.info "migrating inkbunny pools"
query =
Domain::Inkbunny::Pool
.where.not(ib_pool_id: Domain::PostGroup::InkbunnyPool.select(:ib_id))
.includes(pool_joins: %i[post left_post right_post])
if only_user
user = Domain::Inkbunny::User.find_by(name: only_user)
if user.nil?
logger.info "inkbunny user not found: #{only_user}"
return
end
query =
Domain::Inkbunny::Pool.where(
ib_pool_id:
Domain::Inkbunny::PoolJoin.where(post: user.posts).select(
:ib_pool_id,
),
).includes(pool_joins: %i[post left_post right_post])
else
query =
Domain::Inkbunny::Pool
.where.not(ib_pool_id: Domain::PostGroup::InkbunnyPool.select(:ib_id))
.includes(pool_joins: %i[post left_post right_post])
end
pb =
ProgressBar.create(
throttle_rate: 0.2,
@@ -295,7 +395,7 @@ class Domain::MigrateToDomain
output: @pb_sink,
)
query.find_in_batches(batch_size: 10_000) do |batch|
query.find_in_batches(batch_size: 100) do |batch|
ReduxApplicationRecord.transaction do
models =
migrate_batch(Domain::PostGroup::InkbunnyPool, batch) do |old_pool|
@@ -587,7 +687,7 @@ class Domain::MigrateToDomain
if post.creator.present?
new_post.creator =
Domain::User::FaUser.find_by!(url_name: post.creator&.url_name)
Domain::User::FaUser.find_by(url_name: post.creator&.url_name)
end
if post.file.present? || post.file_uri.present?

View File

@@ -120,18 +120,18 @@ class Domain::Fa::Post < ReduxApplicationRecord
# self.state_detail["scanned_at"] = time&.to_i
# end
# sig { override.returns(T.nilable(ActiveSupport::TimeWithZone)) }
# def posted_at
# pa = super
# return pa if pa
# begin
# contents = guess_last_submission_page&.response&.contents
# if contents
# parser = Domain::Fa::Parser::Page.new(contents)
# parser.submission.posted_date if parser.probably_submission?
# end
# end
# end
sig { returns(T.nilable(ActiveSupport::TimeWithZone)) }
def guess_posted_at
pa = posted_at
return pa if pa
begin
contents = guess_last_submission_page&.response&.contents
if contents
parser = Domain::Fa::Parser::Page.new(contents)
parser.submission.posted_date if parser.probably_submission?
end
end
end
# sig { params(log_entry: T.nilable(HttpLogEntry)).void }
# def last_submission_page=(log_entry)

View File

@@ -36,6 +36,19 @@ class Domain::Post < ReduxApplicationRecord
super
end
sig { override.returns(T.nilable(ActiveSupport::TimeWithZone)) }
def posted_at
pa = super
return pa if pa
begin
contents = guess_last_submission_log_entry&.response&.contents
if contents
parser = Domain::Fa::Parser::Page.new(contents)
parser.submission.posted_date if parser.probably_submission?
end
end
end
attr_json :posted_at, :datetime
attr_json :last_submission_log_entry_id, :integer

View File

@@ -94,6 +94,20 @@ class Domain::Post::FaPost < Domain::Post
num_favorites
end
sig { returns(T.nilable(String)) }
def status_for_view
case self.state
when "ok"
"OK"
when "removed"
"Removed"
when "scan_error"
"Scan error"
when "file_error"
"File error"
end
end
sig do
params(
submission: Domain::Fa::Parser::ListedSubmissionParserHelper,

View File

@@ -22,8 +22,12 @@
<div class="mt-2 flex flex-wrap gap-x-4 text-sm text-slate-600">
<span>
<i class="fa-regular fa-calendar mr-1"></i>
Posted: <%= @post.posted_at&.strftime("%Y-%m-%d") %>
(<%= time_ago_in_words(@post.posted_at) if @post.posted_at %> ago)
<% if (posted_at = @post.posted_at || @post.guess_posted_at) %>
Posted: <%= posted_at&.strftime("%Y-%m-%d") %>
(<%= time_ago_in_words(posted_at) if posted_at %> ago)
<% else %>
Posted: unknown
<% end %>
</span>
<span>
<i class="fa-solid fa-eye mr-1"></i>
@@ -77,7 +81,6 @@
<% end %>
<% end %>
</section>
<% if policy(@post).view_file? %>
<section>
<% if @post.file %>
@@ -95,7 +98,6 @@
<% end %>
<% end %>
</section>
<%= render partial: "log_entries/file_details_sky_section",
locals: {
log_entry: @post.file,

View File

@@ -18,7 +18,11 @@
<%= link_to post.title, domain_fa_post_path(post), class: "sky-link block truncate" %>
</span>
<span class="whitespace-nowrap text-slate-500">
<%= time_ago_in_words(post.created_at) %> ago
<% if posted_at = post.posted_at %>
<%= time_ago_in_words(posted_at) %> ago
<% else %>
Unknown posted at
<% end %>
</span>
</div>
<% end %>

View File

@@ -6,3 +6,7 @@
<i class="fa-solid fa-comment mr-1"></i>
Comments: <%= post.num_comments %>
</span>
<span>
<i class="fa-solid fa-calendar-days mr-1"></i>
Status: <%= post.status_for_view %>
</span>

View File

@@ -17,7 +17,11 @@
<%= link_to post.title, domain_post_path(post), class: "sky-link block truncate" %>
</span>
<span class="whitespace-nowrap text-slate-500">
<%= time_ago_in_words(post.created_at) %> ago
<% if posted_at = post.posted_at %>
<%= time_ago_in_words(posted_at) %> ago
<% else %>
unknown
<% end %>
</span>
</div>
<% end %>

View File

@@ -79,6 +79,9 @@ class CreateUnifiedDomainTables < ActiveRecord::Migration[7.2]
t.references :post, null: false, foreign_key: { to_table: :domain_posts }
t.references :log_entry,
null: true,
index: {
unique: true,
},
foreign_key: {
to_table: :http_log_entries,
}

View File

@@ -6805,7 +6805,7 @@ SET default_tablespace = mirai;
-- Name: index_domain_post_files_on_log_entry_id; Type: INDEX; Schema: public; Owner: -; Tablespace: mirai
--
CREATE INDEX index_domain_post_files_on_log_entry_id ON public.domain_post_files USING btree (log_entry_id);
CREATE UNIQUE INDEX index_domain_post_files_on_log_entry_id ON public.domain_post_files USING btree (log_entry_id);
--