Blob entry import/export helper

This commit is contained in:
Dylan Knutson
2023-05-18 17:50:33 -07:00
parent 8e39f5c2f3
commit 1e4a3905f5
2 changed files with 50 additions and 0 deletions

View File

@@ -34,6 +34,16 @@ task :good_job_cron => :environment do
end
end
namespace :blob_entries do
task :export_samples => :environment do
limit = ENV["limit"]&.to_i || raise("need 'limit' (num)")
out = ENV["out"] || raise("need 'out' (file path, .json encoded)")
BlobEntrySampleExporter.new.export_samples(limit, out)
end
task :import_samples => :environment do
end
end
task :good_job do
proxies = ["direct", "proxy-1", "dedipath-1", "serverhost-1"]
proxy = ENV["proxy"]

View File

@@ -0,0 +1,40 @@
class BlobEntrySampleExporter
include HasColorLogger
include HasMeasureDuration
def export_samples(limit, file)
@num_written = 0
@bytes_written = 0
measure(proc {
"wrote #{@num_written} blob entries, #{HexUtil.humansize(@bytes_written)} to #{file}"
}) do
File.open(file, "w") do |file|
::BlobEntry.limit(limit).find_each(batch_size: 32) do |blob_entry|
write_blob_entry(file, blob_entry)
write_blob_entry(file, blob_entry.base) if blob_entry.base
end
end
end
end
def write_blob_entry(file, blob_entry)
hash = blob_entry.to_bulk_insert_hash
json_hash = {
sha256: HexUtil.bin2hex(hash[:sha256]),
content_type: hash[:content_type],
size: hash[:size],
created_at: blob_entry.created_at,
}
json_hash[:base_sha256] = HexUtil.bin2hex(hash[:base_sha256]) if hash[:base_sha256]
file.puts(json_hash.to_json)
file.puts(Base64.strict_encode64(blob_entry.read_attribute(:contents)))
logger.info(
"#{HexUtil.bin2hex(blob_entry.sha256)} - " +
"#{blob_entry.base_sha256.present?} - " +
"#{blob_entry.content_type} - #{HexUtil.humansize(blob_entry.size)} " +
"(#{HexUtil.humansize(blob_entry.bytes_stored)} actual)"
)
@bytes_written += blob_entry.bytes_stored
@num_written += 1
end
end