blob file migration task refactor
This commit is contained in:
@@ -1,118 +1,12 @@
|
||||
require "find"
|
||||
|
||||
namespace :blob_file do
|
||||
desc "migrate blob entries in parallel"
|
||||
task migrate_blob_entries_parallel: %i[environment] do
|
||||
batch_size = ENV["batch_size"]&.to_i || 16
|
||||
start_at = ENV["start_at"] || "0" * 64
|
||||
|
||||
migrator = Domain::BlobFile::MigrateBlobEntryToBlobFile.new
|
||||
migrator.run(batch_size: batch_size, start_sha256: start_at)
|
||||
end
|
||||
|
||||
desc "migrate blob files to the new format"
|
||||
desc "migrate blob entries to blob files"
|
||||
task migrate_blob_entries: %i[environment] do
|
||||
batch_size = ENV["batch_size"]&.to_i || 1000
|
||||
profile = ENV["profile"] == "true" || false
|
||||
start_at = ENV["start_at"] || "0" * 64
|
||||
num_migrated = 0
|
||||
puts "batch_size: #{batch_size}"
|
||||
start_at = ENV["start_at"] || Tasks::BlobFileMigrationTask::ZERO_SHA256
|
||||
|
||||
RubyProf.start if profile
|
||||
|
||||
def migrate_impl(batch_size, start_at)
|
||||
def n2d(n)
|
||||
ActiveSupport::NumberHelper.number_to_delimited(n).rjust(8)
|
||||
end
|
||||
|
||||
num_migrated = 0
|
||||
num_processed = 0
|
||||
start_time = Time.now
|
||||
BlobEntry.in_batches(
|
||||
of: batch_size,
|
||||
start: HexUtil.hex2bin(start_at),
|
||||
order: :asc,
|
||||
use_ranges: true,
|
||||
) do |batch|
|
||||
batch_migrated = insert_blob_entries_batch(batch)
|
||||
num_migrated += batch_migrated
|
||||
num_processed += batch.size
|
||||
rate = batch_migrated.to_f / (Time.now - start_time)
|
||||
puts [
|
||||
"[migrated: #{n2d(num_migrated)}]",
|
||||
"[processed: #{n2d(num_processed)}]",
|
||||
"[rate: #{rate.round(1).to_s.rjust(5)}/second]",
|
||||
"[last: '#{HexUtil.bin2hex(batch.last.sha256)}']",
|
||||
].join(" ")
|
||||
start_time = Time.now
|
||||
end
|
||||
num_migrated
|
||||
end
|
||||
|
||||
def insert_blob_entries_batch(batch)
|
||||
num_migrated = 0
|
||||
|
||||
blob_entry_sha256s = batch.pluck(:sha256)
|
||||
blob_file_sha256s =
|
||||
BlobFile.where(sha256: blob_entry_sha256s).pluck(:sha256)
|
||||
missing_sha256s = blob_entry_sha256s - blob_file_sha256s
|
||||
|
||||
BlobFile.transaction do
|
||||
BlobEntry
|
||||
.where(sha256: missing_sha256s)
|
||||
.each do |blob_entry|
|
||||
blob_file = BlobFile.initialize_from_blob_entry(blob_entry)
|
||||
sha256_hex = HexUtil.bin2hex(blob_file.sha256)
|
||||
begin
|
||||
blob_file.save!
|
||||
num_migrated += 1
|
||||
rescue => e
|
||||
puts "error saving blob file #{sha256_hex}: #{e}"
|
||||
end
|
||||
end
|
||||
rescue => e
|
||||
puts "error migrating blob entry: #{missing_sha256s.map { |sha256| HexUtil.bin2hex(sha256) }}"
|
||||
raise e
|
||||
end
|
||||
num_migrated
|
||||
end
|
||||
|
||||
def start_thread(batch_size, start_at)
|
||||
Thread.new { migrate_impl(batch_size, start_at) }
|
||||
end
|
||||
|
||||
num_threads = 1
|
||||
# skip = ((0xFFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF) / num_threads) + 1
|
||||
num_migrated =
|
||||
(0...num_threads)
|
||||
.map do |i|
|
||||
# partition the entire sha256 space into num_threads chunks
|
||||
# each chunk is 256 / num_threads in size
|
||||
# start_at = (skip * i).to_s(16).rjust(32, "0")
|
||||
# stop_at = ((skip * (i + 1)) - 1).to_s(16).rjust(32, "0")
|
||||
puts "migrate #{start_at}"
|
||||
start_thread(batch_size, start_at)
|
||||
end
|
||||
.map(&:value)
|
||||
.sum
|
||||
|
||||
begin
|
||||
base = "profiler/blob_file_migrate"
|
||||
FileUtils.mkdir_p(base) unless File.exist?(base)
|
||||
result = RubyProf.stop
|
||||
File.open("#{base}/profile.txt", "w") do |f|
|
||||
RubyProf::GraphPrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
File.open("#{base}/profile.html", "w") do |f|
|
||||
RubyProf::CallStackPrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
File.open("#{base}/profile.rubyprof", "w") do |f|
|
||||
RubyProf::SpeedscopePrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
puts "wrote profile to #{base}"
|
||||
end if profile
|
||||
|
||||
puts "migrated #{num_migrated} total blob entries"
|
||||
Tasks::BlobFileMigrationTask.new.run(batch_size:, start_sha256: start_at)
|
||||
end
|
||||
|
||||
task verify_fs_files: :environment do
|
||||
|
||||
Reference in New Issue
Block a user