fix sqlite exporter
This commit is contained in:
@@ -34,6 +34,7 @@ RUN \
|
||||
ca-certificates \
|
||||
curl \
|
||||
gnupg \
|
||||
iputils-ping \
|
||||
libblas-dev \
|
||||
libdb-dev \
|
||||
libffi-dev \
|
||||
@@ -48,7 +49,7 @@ RUN \
|
||||
libvips42 \
|
||||
libyaml-dev \
|
||||
patch \
|
||||
iputils-ping \
|
||||
postgresql-client \
|
||||
rustc \
|
||||
uuid-dev \
|
||||
zlib1g-dev
|
||||
|
||||
@@ -39,8 +39,8 @@ services:
|
||||
PGADMIN_DEFAULT_EMAIL: admin@example.com
|
||||
PGADMIN_DEFAULT_PASSWORD: password
|
||||
PGADMIN_LISTEN_PORT: 8080
|
||||
PGADMIN_CONFIG_SERVER_MODE: False
|
||||
PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: False
|
||||
PGADMIN_CONFIG_SERVER_MODE: "False"
|
||||
PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: "False"
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
|
||||
@@ -11,3 +11,4 @@ install_extension dbaeumer.vscode-eslint
|
||||
install_extension aliariff.vscode-erb-beautify
|
||||
install_extension bradlc.vscode-tailwindcss
|
||||
install_extension KoichiSasada.vscode-rdbg
|
||||
install_extension qwtel.sqlite-viewer
|
||||
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@@ -23,5 +23,6 @@
|
||||
"other": "on",
|
||||
"comments": "off",
|
||||
"strings": "on"
|
||||
}
|
||||
},
|
||||
"sqliteViewer.maxFileSize": 1024
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
include HasMeasureDuration
|
||||
include HasBulkEnqueueJobs
|
||||
|
||||
USERS_PER_FULL_PAGE = Rails.env.test? ? 9 : 190
|
||||
@@ -39,7 +38,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
if new_favs.empty?
|
||||
@user.scanned_favs_at = Time.now
|
||||
@user.save!
|
||||
logger.info "no new favs, stopping favs scan early"
|
||||
logger.info "[no new favs] [stopping scan]"
|
||||
return
|
||||
end
|
||||
end
|
||||
@@ -49,45 +48,32 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
@page_number += 1
|
||||
end
|
||||
|
||||
to_add = nil
|
||||
to_remove = nil
|
||||
measure(
|
||||
proc do |jobs|
|
||||
"add #{to_add.size.to_s.bold} favs, " +
|
||||
"remove #{to_remove.size.to_s.bold} favs"
|
||||
end
|
||||
) do
|
||||
to_remove = existing_faved_ids - @seen_post_ids
|
||||
to_add = @seen_post_ids - existing_faved_ids
|
||||
end
|
||||
to_remove = existing_faved_ids - @seen_post_ids
|
||||
to_add = @seen_post_ids - existing_faved_ids
|
||||
logger.info "[calc change favs] [add #{to_add.size.to_s.bold}] [remove #{to_remove.size.to_s.bold}]"
|
||||
|
||||
measure(
|
||||
proc do
|
||||
"updated favs list to #{@user.fav_post_joins.count.to_s.bold} posts"
|
||||
ReduxApplicationRecord.transaction do
|
||||
if to_remove.any?
|
||||
@user.fav_post_joins.where(post_id: to_remove).delete_all
|
||||
end
|
||||
) do
|
||||
ReduxApplicationRecord.transaction do
|
||||
if to_remove.any?
|
||||
@user.fav_post_joins.where(post_id: to_remove).delete_all
|
||||
|
||||
slice_size =
|
||||
if to_add.size <= 2500
|
||||
100
|
||||
else
|
||||
1000
|
||||
end
|
||||
|
||||
slice_size =
|
||||
if to_add.size <= 2500
|
||||
100
|
||||
else
|
||||
1000
|
||||
end
|
||||
|
||||
if to_add.any?
|
||||
to_add.each_slice(slice_size) do |slice|
|
||||
@user.fav_post_joins.insert_all!(slice.map { |id| { post_id: id } })
|
||||
end
|
||||
if to_add.any?
|
||||
to_add.each_slice(slice_size) do |slice|
|
||||
@user.fav_post_joins.insert_all!(slice.map { |id| { post_id: id } })
|
||||
end
|
||||
|
||||
@user.scanned_favs_at = Time.now
|
||||
@user.save!
|
||||
end
|
||||
|
||||
@user.scanned_favs_at = Time.now
|
||||
@user.save!
|
||||
end
|
||||
logger.info "[updated favs list] [posts: #{@user.fav_post_joins.count.to_s.bold}]"
|
||||
end
|
||||
|
||||
private
|
||||
@@ -126,49 +112,47 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
@total_items_seen += submissions.length
|
||||
|
||||
posts_to_create_hashes = []
|
||||
measure(
|
||||
proc do
|
||||
"page #{@page_number.to_s.bold} - " +
|
||||
"#{submissions.length.to_s.bold} posts on page, " +
|
||||
"created #{posts_to_create_hashes.size.to_s.bold}"
|
||||
end
|
||||
) do
|
||||
existing_fa_id_to_post_id =
|
||||
Domain::Fa::Post
|
||||
.where(fa_id: submissions.map(&:id))
|
||||
.pluck(:fa_id, :id)
|
||||
.to_h
|
||||
existing_fa_id_to_post_id =
|
||||
Domain::Fa::Post
|
||||
.where(fa_id: submissions.map(&:id))
|
||||
.pluck(:fa_id, :id)
|
||||
.to_h
|
||||
|
||||
posts_to_create_hashes =
|
||||
submissions
|
||||
.reject { |submission| existing_fa_id_to_post_id[submission.id] }
|
||||
.map do |submission|
|
||||
Domain::Fa::Post.hash_from_submission_parser_helper(
|
||||
submission,
|
||||
first_seen_log_entry: response.log_entry
|
||||
)
|
||||
end
|
||||
posts_to_create_hashes =
|
||||
submissions
|
||||
.reject { |submission| existing_fa_id_to_post_id[submission.id] }
|
||||
.map do |submission|
|
||||
Domain::Fa::Post.hash_from_submission_parser_helper(
|
||||
submission,
|
||||
first_seen_log_entry: response.log_entry
|
||||
)
|
||||
end
|
||||
|
||||
created_post_ids = []
|
||||
created_post_ids =
|
||||
Domain::Fa::Post
|
||||
.insert_all!(posts_to_create_hashes, returning: %i[id fa_id])
|
||||
.map { |row| row["id"] } unless posts_to_create_hashes.empty?
|
||||
created_post_ids = []
|
||||
created_post_ids =
|
||||
Domain::Fa::Post
|
||||
.insert_all!(posts_to_create_hashes, returning: %i[id fa_id])
|
||||
.map { |row| row["id"] } unless posts_to_create_hashes.empty?
|
||||
|
||||
enqueue_new_post_scan_jobs(
|
||||
posts_to_create_hashes.map { |hash| hash[:fa_id] }
|
||||
)
|
||||
enqueue_new_post_scan_jobs(
|
||||
posts_to_create_hashes.map { |hash| hash[:fa_id] }
|
||||
)
|
||||
|
||||
@last_page_post_ids = Set.new
|
||||
created_post_ids.each do |id|
|
||||
@seen_post_ids.add(id)
|
||||
@last_page_post_ids.add(id)
|
||||
end
|
||||
existing_fa_id_to_post_id.values.each do |id|
|
||||
@seen_post_ids.add(id)
|
||||
@last_page_post_ids.add(id)
|
||||
end
|
||||
@last_page_post_ids = Set.new
|
||||
created_post_ids.each do |id|
|
||||
@seen_post_ids.add(id)
|
||||
@last_page_post_ids.add(id)
|
||||
end
|
||||
existing_fa_id_to_post_id.values.each do |id|
|
||||
@seen_post_ids.add(id)
|
||||
@last_page_post_ids.add(id)
|
||||
end
|
||||
|
||||
logger.info [
|
||||
"[page #{@page_number.to_s.bold}]",
|
||||
"[posts: #{submissions.length.to_s.bold}]",
|
||||
"[created: #{posts_to_create_hashes.size.to_s.bold}]"
|
||||
].join(" ")
|
||||
|
||||
ret
|
||||
end
|
||||
|
||||
@@ -35,35 +35,25 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
@page_number += 1
|
||||
end
|
||||
|
||||
to_add = nil
|
||||
to_remove = nil
|
||||
measure(
|
||||
proc do |jobs|
|
||||
"add #{to_add.size.to_s.bold} follows, " +
|
||||
"remove #{to_remove.size.to_s.bold} follows"
|
||||
existing_followed_ids = Set.new(@user.follower_joins.pluck(:followed_id))
|
||||
to_remove = existing_followed_ids - @scanned_followed_ids
|
||||
to_add = @scanned_followed_ids - existing_followed_ids
|
||||
logger.info "[calc changed follows] [add #{to_add.size.to_s.bold}] [remove #{to_remove.size.to_s.bold}]"
|
||||
|
||||
ReduxApplicationRecord.transaction do
|
||||
if to_remove.any?
|
||||
@user.follower_joins.where(followed_id: to_remove).delete_all
|
||||
end
|
||||
) do
|
||||
existing_followed_ids = Set.new(@user.follower_joins.pluck(:followed_id))
|
||||
to_remove = existing_followed_ids - @scanned_followed_ids
|
||||
to_add = @scanned_followed_ids - existing_followed_ids
|
||||
if to_add.any?
|
||||
@user.follower_joins.insert_all!(
|
||||
to_add.map { |id| { followed_id: id } }
|
||||
)
|
||||
end
|
||||
@user.scanned_follows_at = Time.now
|
||||
@user.save!
|
||||
end
|
||||
|
||||
measure(
|
||||
proc { "updated follows list to #{@user.follows.count.to_s.bold} users" }
|
||||
) do
|
||||
ReduxApplicationRecord.transaction do
|
||||
if to_remove.any?
|
||||
@user.follower_joins.where(followed_id: to_remove).delete_all
|
||||
end
|
||||
if to_add.any?
|
||||
@user.follower_joins.insert_all!(
|
||||
to_add.map { |id| { followed_id: id } }
|
||||
)
|
||||
end
|
||||
@user.scanned_follows_at = Time.now
|
||||
@user.save!
|
||||
end
|
||||
end
|
||||
logger.info "[updated follows list] [users: #{@user.follows.count.to_s.bold}]"
|
||||
|
||||
if @created_user
|
||||
logger.info("user was new record, enqueue page scan job")
|
||||
@@ -114,49 +104,45 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
@last_in_user_list = user_list.last.url_name
|
||||
@total_follows_seen += user_list.length
|
||||
|
||||
users_to_create_hashes = []
|
||||
followed_user_ids =
|
||||
measure(
|
||||
proc do
|
||||
"page #{@page_number.to_s.bold} - " +
|
||||
"#{user_list.length.to_s.bold} users on page, " +
|
||||
"created #{users_to_create_hashes.size.to_s.bold}"
|
||||
existing_url_name_to_id =
|
||||
Domain::Fa::User
|
||||
.where(url_name: user_list.map(&:url_name))
|
||||
.pluck(:id, :url_name)
|
||||
.map { |id, url_name| [url_name, id] }
|
||||
.to_h
|
||||
|
||||
users_to_create_hashes =
|
||||
user_list
|
||||
.reject { |user| existing_url_name_to_id[user.url_name] }
|
||||
.map do |user|
|
||||
{
|
||||
url_name: user.url_name,
|
||||
name: user.name,
|
||||
state_detail: {
|
||||
"first_seen_entry" => response.log_entry.id
|
||||
}
|
||||
}
|
||||
end
|
||||
) do
|
||||
existing_url_name_to_id =
|
||||
Domain::Fa::User
|
||||
.where(url_name: user_list.map(&:url_name))
|
||||
.pluck(:id, :url_name)
|
||||
.map { |id, url_name| [url_name, id] }
|
||||
.to_h
|
||||
|
||||
users_to_create_hashes =
|
||||
user_list
|
||||
.reject { |user| existing_url_name_to_id[user.url_name] }
|
||||
.map do |user|
|
||||
{
|
||||
url_name: user.url_name,
|
||||
name: user.name,
|
||||
state_detail: {
|
||||
"first_seen_entry" => response.log_entry.id
|
||||
}
|
||||
}
|
||||
end
|
||||
created_user_ids =
|
||||
Domain::Fa::User
|
||||
.upsert_all(
|
||||
users_to_create_hashes,
|
||||
unique_by: :url_name,
|
||||
update_only: :url_name,
|
||||
returning: %i[id url_name]
|
||||
)
|
||||
.map { |row| row["id"] } unless users_to_create_hashes.empty?
|
||||
|
||||
created_user_ids =
|
||||
Domain::Fa::User
|
||||
.upsert_all(
|
||||
users_to_create_hashes,
|
||||
unique_by: :url_name,
|
||||
update_only: :url_name,
|
||||
returning: %i[id url_name]
|
||||
)
|
||||
.map { |row| row["id"] } unless users_to_create_hashes.empty?
|
||||
|
||||
enqueue_new_user_pagescan_jobs(users_to_create_hashes)
|
||||
(created_user_ids || []) + existing_url_name_to_id.values
|
||||
end
|
||||
logger.info [
|
||||
"[page #{@page_number.to_s.bold}]",
|
||||
"[users: #{user_list.length.to_s.bold}]",
|
||||
"[created: #{users_to_create_hashes.size.to_s.bold}]"
|
||||
].join(" ")
|
||||
|
||||
enqueue_new_user_pagescan_jobs(users_to_create_hashes)
|
||||
followed_user_ids =
|
||||
(created_user_ids || []) + existing_url_name_to_id.values
|
||||
followed_user_ids.each { |user_id| @scanned_followed_ids.add(user_id) }
|
||||
|
||||
ret
|
||||
|
||||
@@ -40,7 +40,11 @@ class Domain::Fa::PostEnqueuer
|
||||
"enqueuing #{to_enqueue.to_s.bold} more posts - #{already_enqueued.to_s.bold} already enqueued"
|
||||
)
|
||||
rows =
|
||||
measure(proc { |p| "gather #{p.length.to_s.bold} posts to enqueue" }) do
|
||||
measure(
|
||||
proc do |p|
|
||||
p && "gathered #{p.length.to_s.bold} posts" || "gathering posts..."
|
||||
end
|
||||
) do
|
||||
to_enqueue
|
||||
.times
|
||||
.map do
|
||||
|
||||
@@ -8,14 +8,18 @@ class Domain::Fa::PostFactorCalculator
|
||||
end
|
||||
|
||||
def fit
|
||||
logger.info "loading fav rows..."
|
||||
limit = 100_000_000
|
||||
dataset =
|
||||
measure(proc { |r| "loaded #{r.length.to_s.bold} favs" }) do
|
||||
Domain::Fa::Fav
|
||||
.all
|
||||
.pluck(:user_id, :post_id)
|
||||
.map { |user_id, post_id| { user_id: user_id, item_id: post_id } }
|
||||
end
|
||||
measure(
|
||||
->(r) do
|
||||
r && "loaded #{r.length.to_s.bold} favs" ||
|
||||
"loading up to #{limit} favs"
|
||||
end
|
||||
) { Domain::Fa::Fav.all.limit(limit).pluck(:user_id, :post_id).to_a }
|
||||
|
||||
measure("convert to hash") do
|
||||
dataset.map! { |user_id, post_id| { user_id: user_id, item_id: post_id } }
|
||||
end
|
||||
|
||||
measure("fit #{dataset.length.to_s.bold} favs") do
|
||||
@recommender.fit(dataset)
|
||||
@@ -23,7 +27,7 @@ class Domain::Fa::PostFactorCalculator
|
||||
end
|
||||
|
||||
def write_factors
|
||||
measure("#{"for_favorite".bold} - done") do
|
||||
measure("write factors") do
|
||||
write_factors_col(:item_ids, :item_factors, :for_favorite)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,42 +1,103 @@
|
||||
class Domain::Fa::SqliteExporter
|
||||
include HasMeasureDuration
|
||||
|
||||
TABLES = {
|
||||
fa_users: {
|
||||
model: Domain::Fa::User,
|
||||
columns: [
|
||||
%w[id int primary key],
|
||||
%w[url_name text],
|
||||
%w[name text],
|
||||
%w[artist_type text],
|
||||
%w[mood text],
|
||||
%w[num_pageviews int],
|
||||
%w[num_submissions int],
|
||||
%w[num_comments_recieved int],
|
||||
%w[num_comments_given int],
|
||||
%w[num_journals int],
|
||||
%w[num_favorites int],
|
||||
%w[registered_at text]
|
||||
],
|
||||
indexes: [{ on: "id", unique: true }, { on: "url_name", unique: true }],
|
||||
batch_size: 512,
|
||||
# format registered_at column
|
||||
each_row: ->(row) { row[11] = row[11]&.iso8601 }
|
||||
},
|
||||
fa_follows: {
|
||||
model: Domain::Fa::Follow,
|
||||
columns: [%w[follower_id int], %w[followed_id int]],
|
||||
indexes: [{ on: %w[follower_id followed_id], unique: true }],
|
||||
fk: {
|
||||
follower_id: %w[fa_users id],
|
||||
followed_id: %w[fa_users id]
|
||||
},
|
||||
batch_size: 4096
|
||||
},
|
||||
fa_favs: {
|
||||
model: Domain::Fa::Fav,
|
||||
columns: [%w[user_id int], %w[post_id int]],
|
||||
indexes: [{ on: %w[user_id post_id], unique: true }],
|
||||
fk: {
|
||||
user_id: %w[fa_users id],
|
||||
post_id: %w[fa_posts id]
|
||||
},
|
||||
batch_size: 4096
|
||||
},
|
||||
fa_posts: {
|
||||
model: Domain::Fa::Post.where("file_url_str is not null"),
|
||||
columns: [
|
||||
%w[id int],
|
||||
%w[fa_id int],
|
||||
%w[creator_id int],
|
||||
%w[title text],
|
||||
%w[category text],
|
||||
%w[theme text],
|
||||
%w[species text],
|
||||
%w[gender text],
|
||||
%w[file_url_str text],
|
||||
%w[num_views int],
|
||||
%w[num_comments int],
|
||||
%w[num_favorites int],
|
||||
%w[posted_at text]
|
||||
],
|
||||
batch_size: 4096,
|
||||
indexes: [{ on: "id", unique: true }, { on: "fa_id", unique: true }],
|
||||
fk: {
|
||||
creator_id: %w[fa_users id]
|
||||
},
|
||||
# format posted_at column
|
||||
each_row: ->(row) { row[12] = row[12]&.iso8601 }
|
||||
}
|
||||
}
|
||||
|
||||
def initialize(db, sample, tables)
|
||||
@db = db
|
||||
@sample = sample
|
||||
@tables = tables
|
||||
@tables = tables.include?(:all) ? TABLES.keys : tables
|
||||
@tables.each do |table|
|
||||
raise("unknown table: #{table}") unless TABLES.key?(table)
|
||||
end
|
||||
end
|
||||
|
||||
def run
|
||||
measure("created tables") { migrate }
|
||||
measure("create tables") { migrate }
|
||||
|
||||
measure("drop indexes") { drop_indexes }
|
||||
|
||||
if dump_table?(:users)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa users" }) do
|
||||
dump_fa_users
|
||||
@tables.each do |table|
|
||||
config = TABLES[table]
|
||||
measure(
|
||||
proc do |num|
|
||||
num && "dumped #{table}, #{num} rows" || "dumping #{table}..."
|
||||
end
|
||||
) do
|
||||
dump_table_common(
|
||||
table: table,
|
||||
model: config[:model],
|
||||
columns: config[:columns],
|
||||
batch_size: config[:batch_size],
|
||||
each_row: config[:each_row]
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
if dump_table?(:follows)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa follows" }) do
|
||||
dump_fa_follows
|
||||
end
|
||||
end
|
||||
|
||||
if dump_table?(:favs)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa favs" }) do
|
||||
dump_fa_favs
|
||||
end
|
||||
end
|
||||
|
||||
if dump_table?(:posts)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa posts" }) do
|
||||
dump_fa_posts
|
||||
end
|
||||
end
|
||||
|
||||
measure("created indexes") { create_indexes }
|
||||
ensure
|
||||
@db.close
|
||||
end
|
||||
@@ -63,123 +124,45 @@ class Domain::Fa::SqliteExporter
|
||||
private
|
||||
|
||||
def migrate
|
||||
@db.execute_batch2 <<-SQL
|
||||
create table if not exists fa_users (
|
||||
id int primary key,
|
||||
url_name text,
|
||||
name text,
|
||||
num_favorites int,
|
||||
registered_at text
|
||||
);
|
||||
TABLES.each do |table, config|
|
||||
columns = config[:columns]
|
||||
fk = config[:fk] || {}
|
||||
|
||||
create table if not exists fa_follows (
|
||||
id int primary key,
|
||||
follower_id int,
|
||||
followed_id int
|
||||
);
|
||||
columns_and_fks = [
|
||||
columns.map { |name, *rest| "#{name} #{rest.join(" ")}" }.join(",\n"),
|
||||
fk.map do |name, foreign|
|
||||
foreign_table, foreign_column = foreign
|
||||
"foreign key (#{name}) references #{foreign_table}(#{foreign_column})"
|
||||
end
|
||||
].flatten.join(",\n")
|
||||
|
||||
create table if not exists fa_favs (
|
||||
id int primary key,
|
||||
user_id int,
|
||||
post_id int
|
||||
);
|
||||
|
||||
create table if not exists fa_posts (
|
||||
id int primary key,
|
||||
fa_id int,
|
||||
creator_id int,
|
||||
title text,
|
||||
num_views int,
|
||||
num_comments int,
|
||||
num_favorites int,
|
||||
posted_at text
|
||||
);
|
||||
SQL
|
||||
end
|
||||
|
||||
INDEXES = [
|
||||
["fa_users", "url_name", true],
|
||||
["fa_follows", "follower_id", false],
|
||||
["fa_follows", "followed_id", false],
|
||||
["fa_favs", "user_id", false],
|
||||
["fa_favs", "post_id", false],
|
||||
["fa_posts", "creator_id", false],
|
||||
["fa_posts", "fa_id", true]
|
||||
]
|
||||
|
||||
def create_indexes
|
||||
@db.execute_batch2(INDEXES.map { |table, col, unique| <<-SQL }.join("\n"))
|
||||
create #{unique ? "unique" : ""} index if not exists #{col}_on_#{table}
|
||||
on #{table} (#{col});
|
||||
sql = <<-SQL
|
||||
create table if not exists #{table} (
|
||||
#{columns_and_fks}
|
||||
);
|
||||
SQL
|
||||
end
|
||||
# logger.info(sql)
|
||||
@db.execute_batch2(sql)
|
||||
|
||||
def drop_indexes
|
||||
@db.execute_batch2(INDEXES.map { |table, col, unique| <<-SQL }.join("\n"))
|
||||
drop index if exists #{col}_on_#{table};
|
||||
SQL
|
||||
end
|
||||
|
||||
def dump_fa_users
|
||||
dump_table_common(
|
||||
model: Domain::Fa::User,
|
||||
table: "fa_users",
|
||||
columns: %w[id url_name name num_favorites registered_at],
|
||||
batch_size: 512
|
||||
) do |batch|
|
||||
# format registered_at
|
||||
batch.each { |row| row[4] = row[4]&.iso8601 }
|
||||
end
|
||||
end
|
||||
|
||||
def dump_fa_follows
|
||||
dump_table_common(
|
||||
model: Domain::Fa::Follow,
|
||||
table: "fa_follows",
|
||||
columns: %w[id follower_id followed_id],
|
||||
batch_size: 4096
|
||||
)
|
||||
end
|
||||
|
||||
def dump_fa_favs
|
||||
dump_table_common(
|
||||
model: Domain::Fa::Fav,
|
||||
table: "fa_favs",
|
||||
columns: %w[id user_id post_id],
|
||||
batch_size: 4096
|
||||
)
|
||||
end
|
||||
|
||||
def dump_fa_posts
|
||||
dump_table_common(
|
||||
model: Domain::Fa::Post.where("file_url_str is not null"),
|
||||
table: "fa_posts",
|
||||
columns: %w[
|
||||
id
|
||||
fa_id
|
||||
title
|
||||
creator_id
|
||||
num_views
|
||||
num_comments
|
||||
num_favorites
|
||||
posted_at
|
||||
],
|
||||
batch_size: 4096
|
||||
) do |batch|
|
||||
# format posted_at
|
||||
batch.each { |row| row[7] = row[7]&.iso8601 }
|
||||
config[:indexes].each do |index|
|
||||
unique = index[:unique] ? "unique" : ""
|
||||
cols = [index[:on]].flatten
|
||||
col_names = cols.join("_")
|
||||
sql = <<-SQL
|
||||
create #{unique} index if not exists #{col_names}_on_#{table}
|
||||
on #{table} (#{cols.join(", ")});
|
||||
SQL
|
||||
logger.info(sql)
|
||||
@db.execute_batch2(sql)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# ====== common infra ====== #
|
||||
|
||||
def dump_table_common(model:, table:, columns:, batch_size:)
|
||||
def dump_table_common(model:, table:, columns:, batch_size:, each_row: nil)
|
||||
num_models = 0
|
||||
start_id = max_id(table, "id")
|
||||
start_id += 1 if start_id
|
||||
logger.info(
|
||||
"dumping #{table.bold}, start at #{(start_id || "first").to_s.bold}..."
|
||||
)
|
||||
logger.info("[#{table.to_s.bold}] [batch size: #{batch_size.to_s.bold}]")
|
||||
inserter = create_inserter(batch_size, table, columns)
|
||||
|
||||
load_duration = 0.0
|
||||
@@ -189,40 +172,34 @@ class Domain::Fa::SqliteExporter
|
||||
dump_start = Time.now
|
||||
load_start = Time.now
|
||||
|
||||
@db.transaction
|
||||
pluck_rows(model, columns, batch_size: batch_size) do |batch|
|
||||
@db.transaction do
|
||||
load_duration += Time.now - load_start
|
||||
|
||||
pluck_rows(
|
||||
model,
|
||||
columns,
|
||||
start_id: start_id,
|
||||
batch_size: batch_size
|
||||
) do |rows|
|
||||
load_duration += Time.now - load_start
|
||||
map_start = Time.now
|
||||
batch.each { |row| each_row.call(row) } if each_row
|
||||
map_duration += Time.now - map_start
|
||||
|
||||
map_start = Time.now
|
||||
yield rows if block_given?
|
||||
map_duration += Time.now - map_start
|
||||
insert_start = Time.now
|
||||
inserter.insert(batch)
|
||||
insert_duration += Time.now - insert_start
|
||||
|
||||
insert_start = Time.now
|
||||
inserter.insert(rows)
|
||||
insert_duration += Time.now - insert_start
|
||||
|
||||
num_models += rows.size
|
||||
load_start = Time.now
|
||||
num_models += batch.size
|
||||
load_start = Time.now
|
||||
end
|
||||
end
|
||||
|
||||
dump_duration = Time.now - dump_start
|
||||
logger.info(
|
||||
"time spent on #{table.bold} " +
|
||||
"(#{(num_models / dump_duration).round(0).to_s.bold}/sec): " +
|
||||
"#{load_duration.round(2).to_s.bold} sec loading, " +
|
||||
"#{map_duration.round(2).to_s.bold} sec mapping, " +
|
||||
"#{insert_duration.round(2).to_s.bold} sec inserting"
|
||||
"[#{table.to_s.bold}] " +
|
||||
"[#{(num_models / dump_duration).round(0).to_s.bold}/sec] " +
|
||||
"[load: #{load_duration.round(2).to_s.bold} sec] " +
|
||||
"[map: #{map_duration.round(2).to_s.bold} sec] " +
|
||||
"[insert: #{insert_duration.round(2).to_s.bold} sec]"
|
||||
)
|
||||
num_models
|
||||
ensure
|
||||
inserter.close if inserter
|
||||
@db.commit
|
||||
end
|
||||
|
||||
def create_inserter(bulk_size, table, columns)
|
||||
@@ -235,20 +212,20 @@ class Domain::Fa::SqliteExporter
|
||||
|
||||
def initialize(db, bulk_size, table, columns)
|
||||
@db = db
|
||||
@bulk_size = bulk_size
|
||||
@bulk_size = [bulk_size, 999 / columns.size].min
|
||||
@table = table
|
||||
@columns = columns
|
||||
|
||||
binds = "(" + (["?"] * columns.size).join(", ") + ")"
|
||||
binds = "(" + (["?"] * @columns.size).join(", ") + ")"
|
||||
|
||||
@single = @db.prepare <<-SQL
|
||||
insert into #{table} (#{columns.join(", ")})
|
||||
values #{binds}
|
||||
insert into #{@table} (#{@columns.map(&:first).join(", ")})
|
||||
values #{binds} on conflict do nothing
|
||||
SQL
|
||||
|
||||
@bulk = @db.prepare <<-SQL
|
||||
insert into #{table} (#{columns.join(", ")})
|
||||
values #{([binds] * bulk_size).join(", ")}
|
||||
insert into #{@table} (#{@columns.map(&:first).join(", ")})
|
||||
values #{([binds] * @bulk_size).join(", ")} on conflict do nothing
|
||||
SQL
|
||||
end
|
||||
|
||||
@@ -303,19 +280,15 @@ class Domain::Fa::SqliteExporter
|
||||
end
|
||||
end
|
||||
|
||||
def pluck_rows(relation, cols, start_id:, batch_size:)
|
||||
def pluck_rows(relation, cols, batch_size:)
|
||||
num_batches = 0
|
||||
num_models = 0
|
||||
|
||||
start_time = Time.now
|
||||
models_in_measure = 0
|
||||
|
||||
relation = relation.all unless relation.is_a?(ActiveRecord::Relation)
|
||||
relation = relation.where("id >= ?", start_id) if start_id
|
||||
relation.pluck_in_batches(
|
||||
*cols.map(&:to_sym),
|
||||
batch_size: batch_size
|
||||
) do |batch|
|
||||
relation.in_batches(of: batch_size) do |batch|
|
||||
batch = batch.pluck(*cols.map(&:first).map(&:to_sym)).to_a
|
||||
yield batch
|
||||
|
||||
num_models += batch.size
|
||||
@@ -335,16 +308,4 @@ class Domain::Fa::SqliteExporter
|
||||
|
||||
puts ""
|
||||
end
|
||||
|
||||
def max_id(table, column)
|
||||
@db.get_first_value <<-SQL
|
||||
select max(#{column}) from #{table}
|
||||
SQL
|
||||
end
|
||||
|
||||
def dump_table?(table)
|
||||
ret = @tables.include?(:all) || @tables.include?(table)
|
||||
logger.info("skipping #{table.to_s.bold}...") if !ret
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
||||
@@ -24,9 +24,12 @@ class Domain::Fa::UserEnqueuer
|
||||
"enqueuing #{to_enqueue.to_s.bold} more users - #{already_enqueued.to_s.bold} already enqueued"
|
||||
)
|
||||
rows =
|
||||
measure(proc { |p| "gather #{p.length.to_s.bold} users to enqueue" }) do
|
||||
to_enqueue.times.map { @user_iterator.next }
|
||||
end
|
||||
measure(
|
||||
proc do |p|
|
||||
p && "gathered #{p.length.to_s.bold} users to enqueue" ||
|
||||
"gathering users..."
|
||||
end
|
||||
) { to_enqueue.times.map { @user_iterator.next } }
|
||||
measure("enqueue jobs") do
|
||||
rows.each do |user|
|
||||
types = []
|
||||
|
||||
@@ -8,9 +8,12 @@ class Domain::Fa::UserFactorCalculator
|
||||
end
|
||||
|
||||
def fit
|
||||
logger.info "loading follow rows..."
|
||||
dataset =
|
||||
measure(proc { |r| "loaded #{r.length.to_s.bold} follows" }) do
|
||||
measure(
|
||||
proc do |r|
|
||||
r && "loaded #{r.length.to_s.bold} follows" || "loading follows"
|
||||
end
|
||||
) do
|
||||
Domain::Fa::Follow
|
||||
.all
|
||||
.pluck(:follower_id, :followed_id)
|
||||
@@ -50,7 +53,7 @@ class Domain::Fa::UserFactorCalculator
|
||||
.each_slice(20_000) do |chunk|
|
||||
total += chunk.size
|
||||
measure(
|
||||
" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)"
|
||||
" -> write #{chunk.size.to_s.bold} factors - (#{total.to_s.bold} total)"
|
||||
) do
|
||||
Domain::Fa::UserFactor.upsert_all(
|
||||
chunk,
|
||||
|
||||
@@ -6,6 +6,8 @@ module HasMeasureDuration
|
||||
|
||||
def measure(title)
|
||||
now = Time.now
|
||||
title_str = title.respond_to?(:call) && title.call(nil) || title
|
||||
logger.info "[start] #{title_str}"
|
||||
ret = yield
|
||||
duration = Time.now - now
|
||||
if duration >= 1.hour
|
||||
@@ -17,8 +19,8 @@ module HasMeasureDuration
|
||||
else
|
||||
duration_str = "#{(duration * 1000).round(0).to_s.bold} ms"
|
||||
end
|
||||
title = title.call(ret, duration) if title.respond_to?(:call)
|
||||
logger.info "#{title} - #{duration_str}"
|
||||
title_str = title.call(ret, duration) if title.respond_to?(:call)
|
||||
logger.info "[finish] #{title_str} - #{duration_str}"
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
||||
@@ -116,7 +116,7 @@ class Scraper::HttpClient
|
||||
begin
|
||||
response_blob_file.save unless response_blob_file.persisted?
|
||||
rescue => e
|
||||
puts "error saving blob file #{response_blob_file.sha256_hex}: #{e}"
|
||||
puts "error saving blob file #{HexUtil.bin2hex(response_blob_file.sha256)}: #{e}"
|
||||
end
|
||||
rescue StandardError
|
||||
retries += 1
|
||||
|
||||
@@ -47,13 +47,20 @@ class BlobFile < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
def self.find_or_initialize_from_blob_entry(blob_entry)
|
||||
blob_file =
|
||||
BlobFile.find_or_initialize_by(sha256: blob_entry.sha256) do |blob_file|
|
||||
blob_file.content_type = blob_entry.content_type
|
||||
blob_file.content_bytes = blob_entry.contents
|
||||
blob_file.created_at = blob_entry.created_at
|
||||
end
|
||||
blob_file
|
||||
BlobFile.find_or_initialize_by(sha256: blob_entry.sha256) do |blob_file|
|
||||
blob_file.content_type = blob_entry.content_type
|
||||
blob_file.content_bytes = blob_entry.contents
|
||||
blob_file.created_at = blob_entry.created_at
|
||||
end
|
||||
end
|
||||
|
||||
def self.initialize_from_blob_entry(blob_entry)
|
||||
BlobFile.new(
|
||||
sha256: blob_entry.sha256,
|
||||
content_type: blob_entry.content_type,
|
||||
content_bytes: blob_entry.contents,
|
||||
created_at: blob_entry.created_at
|
||||
)
|
||||
end
|
||||
|
||||
def content_bytes=(content_bytes)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
class Domain::Fa::Fav < ReduxApplicationRecord
|
||||
self.table_name = "domain_fa_favs"
|
||||
self.primary_keys = :user_id, :post_id
|
||||
|
||||
belongs_to :user, class_name: "::Domain::Fa::User"
|
||||
belongs_to :post, class_name: "::Domain::Fa::Post"
|
||||
|
||||
@@ -40,11 +40,13 @@
|
||||
<% if similar %>
|
||||
<% similar.each do |factor| %>
|
||||
<% post = factor.post %>
|
||||
<div class='flex flex-row py-1 px-2 border-b-2 last:border-b-0'>
|
||||
<span class='text-md italic'><%= link_to post.title, domain_fa_post_path(post.fa_id), class: 'underline' %></span> -
|
||||
<%= render "domain/fa/users/inline_link", user: post.creator %>
|
||||
<div class='flex flex-row justify-between items-center py-1 px-2 border-b-2 last:border-b-0'>
|
||||
<div class='flex flex-row items-center'>
|
||||
<span class='text-md italic'><%= link_to post.title, domain_fa_post_path(post.fa_id), class: 'underline' %></span>
|
||||
<span class='italic ml-2'>by <%= render "domain/fa/users/inline_link", user: post.creator %></span>
|
||||
</div>
|
||||
<span class='text-sm text-slate-500 ml-2'>
|
||||
(distance: <%= number_with_precision(factor.neighbor_distance, precision: 3) %>)
|
||||
(distance: <%= number_with_precision(factor.neighbor_distance, precision: 5) %>)
|
||||
</span>
|
||||
</div>
|
||||
<% end %>
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
class CreateDomainFaFollows < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
# pk removed in 20230523162724_remove_pk_from_follows.rb
|
||||
create_table :domain_fa_follows do |t|
|
||||
t.references :follower, null: false
|
||||
t.references :followed, null: false
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
class CreateDomainFaFavs < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
# pk removed in 20241220174922_remove_pk_from_domain_fa_favs.rb
|
||||
create_table :domain_fa_favs do |t|
|
||||
t.references :user, null: false
|
||||
t.references :post, null: false
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
class RemovePkFromDomainFaFavs < ActiveRecord::Migration[7.0]
|
||||
def up
|
||||
add_index :domain_fa_favs, %i[user_id post_id], unique: true
|
||||
remove_column :domain_fa_favs, :id, :bigint, before: :user_id
|
||||
end
|
||||
|
||||
def down
|
||||
add_column :domain_fa_favs, :id, :bigint, primary_key: true, first: true
|
||||
add_index :domain_fa_favs, :id
|
||||
remove_index :domain_fa_favs, %i[user_id post_id]
|
||||
end
|
||||
end
|
||||
232
db/schema.rb
generated
232
db/schema.rb
generated
@@ -10,25 +10,15 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
ActiveRecord::Schema[7.0].define(version: 2024_12_20_174922) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_prewarm"
|
||||
enable_extension "pg_stat_statements"
|
||||
enable_extension "pg_trgm"
|
||||
enable_extension "pgcrypto"
|
||||
enable_extension "plpgsql"
|
||||
enable_extension "vector"
|
||||
|
||||
create_table "blob_entries", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
t.binary "contents", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_entries_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.binary "base_sha256"
|
||||
@@ -39,8 +29,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_00", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_00", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -49,8 +38,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_00_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_01", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_01", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -59,8 +47,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_01_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_02", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_02", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -69,8 +56,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_02_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_03", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_03", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -79,8 +65,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_03_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_04", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_04", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -89,8 +74,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_04_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_05", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_05", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -99,8 +83,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_05_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_06", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_06", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -109,8 +92,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_06_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_07", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_07", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -119,8 +101,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_07_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_08", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_08", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -129,8 +110,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_08_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_09", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_09", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -139,8 +119,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_09_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_10", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_10", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -149,8 +128,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_10_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_11", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_11", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -159,8 +137,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_11_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_12", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_12", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -169,8 +146,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_12_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_13", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_13", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -179,8 +155,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_13_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_14", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_14", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -189,8 +164,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_14_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_15", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_15", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -199,8 +173,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_15_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_16", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_16", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -209,8 +182,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_16_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_17", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_17", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -219,8 +191,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_17_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_18", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_18", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -229,8 +200,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_18_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_19", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_19", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -239,8 +209,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_19_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_20", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_20", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -249,8 +218,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_20_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_21", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_21", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -259,8 +227,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_21_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_22", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_22", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -269,8 +236,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_22_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_23", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_23", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -279,8 +245,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_23_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_24", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_24", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -289,8 +254,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_24_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_25", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_25", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -299,8 +263,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_25_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_26", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_26", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -309,8 +272,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_26_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_27", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_27", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -319,8 +281,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_27_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_28", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_28", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -329,8 +290,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_28_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_29", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_29", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -339,8 +299,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_29_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_30", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_30", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -349,8 +308,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_30_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_31", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_31", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -359,8 +317,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_31_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_32", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_32", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -369,8 +326,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_32_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_33", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_33", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -379,8 +335,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_33_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_34", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_34", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -389,8 +344,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_34_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_35", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_35", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -399,8 +353,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_35_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_36", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_36", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -409,8 +362,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_36_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_37", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_37", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -419,8 +371,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_37_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_38", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_38", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -429,8 +380,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_38_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_39", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_39", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -439,8 +389,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_39_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_40", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_40", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -449,8 +398,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_40_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_41", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_41", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -459,8 +407,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_41_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_42", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_42", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -469,8 +416,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_42_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_43", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_43", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -479,8 +425,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_43_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_44", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_44", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -489,8 +434,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_44_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_45", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_45", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -499,8 +443,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_45_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_46", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_46", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -509,8 +452,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_46_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_47", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_47", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -519,8 +461,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_47_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_48", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_48", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -529,8 +470,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_48_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_49", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_49", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -539,8 +479,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_49_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_50", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_50", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -549,8 +488,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_50_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_51", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_51", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -559,8 +497,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_51_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_52", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_52", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -569,8 +506,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_52_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_53", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_53", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -579,8 +515,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_53_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_54", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_54", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -589,8 +524,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_54_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_55", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_55", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -599,8 +533,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_55_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_56", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_56", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -609,8 +542,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_56_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_57", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_57", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -619,8 +551,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_57_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_58", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_58", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -629,8 +560,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_58_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_59", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_59", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -639,8 +569,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_59_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_60", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_60", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -649,8 +578,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_60_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_61", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_61", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -659,8 +587,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_61_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_62", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_62", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -669,8 +596,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_62_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_entries_p_63", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
create_table "blob_entries_p_63", primary_key: "sha256", id: :binary, force: :cascade do |t|
|
||||
t.binary "base_sha256"
|
||||
t.string "content_type", null: false
|
||||
t.integer "size", null: false
|
||||
@@ -1343,7 +1269,9 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.datetime "updated_at"
|
||||
t.string "signature"
|
||||
t.jsonb "args"
|
||||
t.index ["priority", "run_at"], name: "delayed_jobs_priority"
|
||||
t.index ["priority", "run_at"], name: "delayed_jobs_priority_run_at_idx"
|
||||
t.index ["queue"], name: "delayed_jobs_queue_idx"
|
||||
t.index ["signature"], name: "delayed_jobs_signature_idx", unique: true
|
||||
end
|
||||
|
||||
create_table "domain_e621_post_versions", force: :cascade do |t|
|
||||
@@ -1402,10 +1330,10 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["name"], name: "index_domain_e621_tags_on_name", unique: true
|
||||
end
|
||||
|
||||
create_table "domain_fa_favs", force: :cascade do |t|
|
||||
create_table "domain_fa_favs", id: false, force: :cascade do |t|
|
||||
t.bigint "user_id", null: false
|
||||
t.bigint "post_id", null: false
|
||||
t.index ["post_id"], name: "index_domain_fa_favs_on_post_id"
|
||||
t.index ["user_id", "post_id"], name: "index_domain_fa_favs_on_user_id_and_post_id", unique: true
|
||||
t.index ["user_id"], name: "index_domain_fa_favs_on_user_id"
|
||||
end
|
||||
|
||||
@@ -1447,7 +1375,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.jsonb "state_detail"
|
||||
t.jsonb "log_entry_detail"
|
||||
t.index ["creator_id"], name: "index_domain_fa_posts_on_creator_id"
|
||||
t.index ["fa_id"], name: "index_domain_fa_posts_on_fa_id", unique: true
|
||||
t.index ["fa_id", "id"], name: "index_domain_fa_posts_on_fa_id", unique: true
|
||||
t.index ["file_id"], name: "index_domain_fa_posts_on_file_id"
|
||||
end
|
||||
|
||||
@@ -1507,7 +1435,9 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.jsonb "state_detail"
|
||||
t.datetime "scanned_follows_at"
|
||||
t.datetime "scanned_favs_at"
|
||||
t.index ["name"], name: "domain_fa_users_name_idx", opclass: :gist_trgm_ops, using: :gist
|
||||
t.index ["name"], name: "index_domain_fa_users_on_name", unique: true
|
||||
t.index ["url_name"], name: "domain_fa_users_url_name_idx", opclass: :gist_trgm_ops, using: :gist
|
||||
t.index ["url_name"], name: "index_domain_fa_users_on_url_name", unique: true
|
||||
end
|
||||
|
||||
@@ -1603,6 +1533,13 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
create_table "domain_inkbunny_user_avatars", force: :cascade do |t|
|
||||
t.bigint "user_id", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["user_id"], name: "index_domain_inkbunny_user_avatars_on_user_id"
|
||||
end
|
||||
|
||||
create_table "domain_inkbunny_users", force: :cascade do |t|
|
||||
t.integer "state", null: false
|
||||
t.json "state_detail"
|
||||
@@ -1658,7 +1595,6 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.integer "state"
|
||||
t.json "state_detail"
|
||||
t.json "raw_data"
|
||||
t.integer "tw_id"
|
||||
t.string "name", null: false
|
||||
t.string "nick"
|
||||
t.string "description"
|
||||
@@ -1669,6 +1605,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.datetime "scanned_timeline_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.bigint "tw_id"
|
||||
t.index ["name"], name: "index_domain_twitter_users_on_name", unique: true
|
||||
t.index ["tw_id"], name: "index_domain_twitter_users_on_tw_id", unique: true
|
||||
end
|
||||
@@ -1838,9 +1775,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
t.index ["item_type", "item_id"], name: "index_versions_on_item_type_and_item_id"
|
||||
end
|
||||
|
||||
add_foreign_key "blob_entries", "blob_entries", column: "base_sha256", primary_key: "sha256"
|
||||
add_foreign_key "domain_e621_post_versions", "domain_e621_posts", column: "item_id"
|
||||
add_foreign_key "domain_fa_favs", "domain_fa_posts", column: "post_id"
|
||||
add_foreign_key "domain_fa_favs", "domain_fa_users", column: "user_id"
|
||||
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "followed_id"
|
||||
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "follower_id"
|
||||
@@ -1860,8 +1795,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_19_201430) do
|
||||
add_foreign_key "domain_inkbunny_pool_joins", "domain_inkbunny_posts", column: "post_id"
|
||||
add_foreign_key "domain_twitter_medias", "domain_twitter_tweets", column: "tweet_id"
|
||||
add_foreign_key "domain_twitter_medias", "http_log_entries", column: "file_id"
|
||||
add_foreign_key "domain_twitter_tweets", "domain_twitter_users", column: "author_id"
|
||||
add_foreign_key "domain_twitter_user_versions", "domain_twitter_users", column: "item_id"
|
||||
add_foreign_key "domain_twitter_tweets", "domain_twitter_users", column: "author_id", primary_key: "tw_id", name: "on_author_id"
|
||||
add_foreign_key "http_log_entries", "http_log_entries", column: "caused_by_id"
|
||||
add_foreign_key "http_log_entries", "http_log_entry_headers", column: "request_headers_id"
|
||||
add_foreign_key "http_log_entries", "http_log_entry_headers", column: "response_headers_id"
|
||||
|
||||
3
justfile
3
justfile
@@ -12,3 +12,6 @@ format-changed:
|
||||
|
||||
format-all:
|
||||
find . -name '*.rb' -type f -exec sh -c 'stree format "$1" > "$1.tmp" && mv "$1.tmp" "$1"' sh {} \;
|
||||
|
||||
psql-dump-domain-fa-favs:
|
||||
@psql -P pager=off -c 'select user_id, post_id, 1 from domain_fa_favs limit 10000000;' -d redux_prod -h 10.166.33.171 -U scraper_redux -t -A -F ' '
|
||||
|
||||
@@ -5,65 +5,71 @@ namespace :blob_file do
|
||||
task migrate_blob_entries: %i[environment] do
|
||||
batch_size = ENV["batch_size"]&.to_i || 1000
|
||||
profile = ENV["profile"] == "true" || false
|
||||
start_at = ENV["start_at"] || "0" * 64
|
||||
num_migrated = 0
|
||||
puts "batch_size: #{batch_size}"
|
||||
|
||||
RubyProf.start if profile
|
||||
|
||||
def migrate_impl(batch_size, start_at, stop_at)
|
||||
def migrate_impl(batch_size, start_at)
|
||||
num_migrated = 0
|
||||
num_processed = 0
|
||||
start_time = Time.now
|
||||
BlobEntryP
|
||||
.where("sha256 NOT IN (SELECT sha256 FROM blob_files)")
|
||||
.includes(:base)
|
||||
.find_in_batches(
|
||||
batch_size: batch_size,
|
||||
start: HexUtil.hex2bin(start_at),
|
||||
finish: HexUtil.hex2bin(stop_at)
|
||||
) do |batch|
|
||||
batch_migrated = insert_blob_entries_batch(batch)
|
||||
num_migrated += batch_migrated
|
||||
rate = batch_migrated.to_f / (Time.now - start_time)
|
||||
puts "migrated #{batch_migrated} @ #{rate.round(1)}/second blob entries [last: #{HexUtil.bin2hex(batch.last.sha256)}]"
|
||||
start_time = Time.now
|
||||
end
|
||||
BlobEntryP.in_batches(
|
||||
of: batch_size,
|
||||
start: HexUtil.hex2bin(start_at),
|
||||
order: :asc
|
||||
) do |batch|
|
||||
batch_migrated = insert_blob_entries_batch(batch)
|
||||
num_migrated += batch_migrated
|
||||
num_processed += batch.size
|
||||
rate = batch_migrated.to_f / (Time.now - start_time)
|
||||
puts "migrated #{batch_migrated}, processed #{num_processed} @ #{rate.round(1)}/second [last: #{HexUtil.bin2hex(batch.last.sha256)}]"
|
||||
start_time = Time.now
|
||||
end
|
||||
num_migrated
|
||||
end
|
||||
|
||||
def insert_blob_entries_batch(batch)
|
||||
num_migrated = 0
|
||||
|
||||
blob_entry_sha256s = batch.pluck(:sha256)
|
||||
blob_file_sha256s =
|
||||
BlobFile.where(sha256: blob_entry_sha256s).pluck(:sha256)
|
||||
missing_sha256s = blob_entry_sha256s - blob_file_sha256s
|
||||
|
||||
BlobFile.transaction do
|
||||
batch.each do |blob_entry|
|
||||
blob_file = BlobFile.find_or_initialize_from_blob_entry(blob_entry)
|
||||
sha256_hex = HexUtil.bin2hex(blob_file.sha256)
|
||||
begin
|
||||
unless blob_file.persisted?
|
||||
BlobEntryP
|
||||
.where(sha256: missing_sha256s)
|
||||
.each do |blob_entry|
|
||||
blob_file = BlobFile.initialize_from_blob_entry(blob_entry)
|
||||
sha256_hex = HexUtil.bin2hex(blob_file.sha256)
|
||||
begin
|
||||
blob_file.save!
|
||||
num_migrated += 1
|
||||
rescue => e
|
||||
puts "error saving blob file #{sha256_hex}: #{e}"
|
||||
end
|
||||
rescue => e
|
||||
puts "error saving blob file #{sha256_hex}: #{e}"
|
||||
end
|
||||
end
|
||||
end
|
||||
num_migrated
|
||||
end
|
||||
|
||||
def start_thread(batch_size, start_at, stop_at)
|
||||
Thread.new { migrate_impl(batch_size, start_at, stop_at) }
|
||||
def start_thread(batch_size, start_at)
|
||||
Thread.new { migrate_impl(batch_size, start_at) }
|
||||
end
|
||||
|
||||
num_ractors = 4
|
||||
skip = (0xFFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF / num_ractors) + 1
|
||||
num_threads = 1
|
||||
# skip = ((0xFFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF) / num_threads) + 1
|
||||
num_migrated =
|
||||
(0...num_ractors)
|
||||
(0...num_threads)
|
||||
.map do |i|
|
||||
# partition the entire sha256 space into num_ractors chunks
|
||||
# each chunk is 256 / num_ractors in size
|
||||
start_at = (skip * i).to_s(16).rjust(32, "0")
|
||||
stop_at = ((skip * (i + 1)) - 1).to_s(16).rjust(32, "0")
|
||||
puts "migrate #{start_at} -> #{stop_at}"
|
||||
start_thread(batch_size, start_at, stop_at)
|
||||
# partition the entire sha256 space into num_threads chunks
|
||||
# each chunk is 256 / num_threads in size
|
||||
# start_at = (skip * i).to_s(16).rjust(32, "0")
|
||||
# stop_at = ((skip * (i + 1)) - 1).to_s(16).rjust(32, "0")
|
||||
puts "migrate #{start_at}"
|
||||
start_thread(batch_size, start_at)
|
||||
end
|
||||
.map(&:value)
|
||||
.sum
|
||||
|
||||
32
rake/fa.rake
32
rake/fa.rake
@@ -147,6 +147,34 @@ namespace :fa do
|
||||
worker.write_factors
|
||||
end
|
||||
|
||||
desc "dump user follows to stdout, formatted for mf-train"
|
||||
task dump_user_follows: %i[set_logger_stdout environment] do
|
||||
relation = Domain::Fa::Follow.all
|
||||
total = relation.count
|
||||
$stderr.puts "dumping #{total} follows..."
|
||||
# print progress bar to stderr
|
||||
progress =
|
||||
ProgressBar.create(
|
||||
total: total,
|
||||
format: "%t: %c/%C %B %p%% %a %e",
|
||||
output: $stderr,
|
||||
throttle_rate: 0.2
|
||||
)
|
||||
|
||||
# make stdout unbuffered
|
||||
$stdout.sync = false
|
||||
|
||||
relation.in_batches(of: 100_000) do |relation|
|
||||
values = relation.pluck(:follower_id, :followed_id)
|
||||
values.each do |follower_id, followed_id|
|
||||
$stdout.puts "#{follower_id} #{followed_id} 1"
|
||||
end
|
||||
progress.progress += values.size
|
||||
end
|
||||
|
||||
$stdout.flush
|
||||
end
|
||||
|
||||
desc "Import existing FA posts"
|
||||
task :import_existing, [:start_at] => [:environment] do |t, args|
|
||||
batch_size = args[:batch_size]&.to_i || ENV["batch_size"]&.to_i
|
||||
@@ -273,7 +301,9 @@ namespace :fa do
|
||||
|
||||
tables =
|
||||
ENV["tables"] ||
|
||||
raise("'tables' required (all, users, follows, favs, posts)")
|
||||
raise(
|
||||
"'tables' required (all, #{Domain::Fa::SqliteExporter::TABLES.keys.join(", ")})"
|
||||
)
|
||||
tables = tables.split(",").map(&:to_sym)
|
||||
|
||||
db = SQLite3::Database.new(outfile)
|
||||
|
||||
2
spec/lib/domain/fa/sqlite_exporter_spec.rb
Normal file
2
spec/lib/domain/fa/sqlite_exporter_spec.rb
Normal file
@@ -0,0 +1,2 @@
|
||||
describe Domain::Fa::SqliteExporter do
|
||||
end
|
||||
@@ -62,7 +62,7 @@ class BlobFileTest < ActiveSupport::TestCase
|
||||
|
||||
test "from an initialized BlobEntryP" do
|
||||
blob_entry = TestUtil.build_blob_entry
|
||||
blob_file = BlobFile.find_or_initialize_from_blob_entry(blob_entry)
|
||||
blob_file = BlobFile.initialize_from_blob_entry(blob_entry)
|
||||
assert blob_file.save
|
||||
assert_equal blob_file.content_bytes, blob_entry.contents
|
||||
assert_equal blob_file.content_type, blob_entry.content_type
|
||||
|
||||
Reference in New Issue
Block a user