Files
redux-scraper/app/lib/db_sampler.rb
2025-01-01 03:29:53 +00:00

128 lines
3.2 KiB
Ruby

# typed: true
class DbSampler
SCHEMA = {
::Domain::Fa::User => %i[avatar posts disco follower_joins fav_post_joins],
::Domain::Fa::UserAvatar => %i[file log_entry],
::Domain::Fa::Post => %i[file creator],
::Domain::Fa::Follow => %i[follower followed],
::Domain::Fa::Fav => %i[user post],
::Domain::Fa::UserFactor => [],
::BlobEntry => [:base],
::HttpLogEntry => %i[
request_headers
response_headers
response
caused_by_entry
],
::HttpLogEntryHeader => [],
}
def initialize(file)
@file = file
@handled = Set.new
end
def export(url_names)
$stderr.puts "exporting users..."
Domain::Fa::User
.includes(
{
avatar: %i[file log_entry],
follower_joins: %i[follower followed],
posts: {
file: :response,
},
},
)
.where(url_name: url_names)
.each { |user| handle_model(user, 0, 0) }
end
def import
raise("must run in dev") unless Rails.env.development?
$stderr.puts "reading file..."
deferred = []
while (line = @file.gets)
line.chomp!
model = Marshal.load(Zstd.decompress(Base64.strict_decode64(line)))
ReduxApplicationRecord.transaction do
begin
import_model(model)
rescue ActiveRecord::InvalidForeignKey
$stderr.puts("defer #{model_id(model)}")
deferred.push(model)
end
end
end
ReduxApplicationRecord.transaction do
deferred.each do |model|
import_model(model)
rescue StandardError
end
end
end
private
def model_id(model)
name = model.class.name
pk = model.class.primary_key
id = model.id
id_fmt = id.to_s
id_fmt = HexUtil.bin2hex(id) if pk == "sha256"
id_fmt = "#{name} / #{id_fmt}"
end
def import_model(model)
id = model.id
exists = model.class.where(model.class.primary_key => [id]).exists?
if exists
$stderr.puts("skipped existing #{model_id(model)}")
else
model2 = model.class.new
model
.attribute_names
.map(&:to_sym)
.each do |attr|
model2.write_attribute(attr, model.read_attribute(attr))
end
model2.save(validate: false)
$stderr.puts("imported #{model_id(model)}")
end
end
def handle_model(model, level, user_depth)
return unless model
user_depth += 1 if model.is_a?(Domain::Fa::User)
return unless @handled.add?(model)
assocs = SCHEMA[model.class] || raise("invalid: #{model.class.name}")
assocs.each do |assoc|
if user_depth > 1
next unless %i[avatar disco file log_entry].include?(assoc)
end
model2 = model.send(assoc)
next unless model2
if model2.respond_to? :each
model2.each { |model3| handle_model(model3, level + 1, user_depth) }
else
handle_model(model2, level + 1, user_depth)
end
end
dump(model, level)
end
def dump(model, level)
@file.puts(Base64.strict_encode64(Zstd.compress(Marshal.dump(model), 1)))
id = model.id
id = HexUtil.bin2hex(id) if model.class.primary_key == "sha256"
$stderr.puts ("-" * level) + " dumped #{model.class.name}/#{id}"
end
end