134 lines
4.3 KiB
Ruby
134 lines
4.3 KiB
Ruby
require "find"
|
|
|
|
namespace :blob_file do
|
|
desc "migrate blob entries in parallel"
|
|
task migrate_blob_entries_parallel: %i[environment] do
|
|
batch_size = ENV["batch_size"]&.to_i || 16
|
|
start_at = ENV["start_at"] || "0" * 64
|
|
|
|
migrator = Domain::BlobFile::MigrateBlobEntryToBlobFile.new
|
|
migrator.run(batch_size: batch_size, start_sha256: start_at)
|
|
end
|
|
|
|
desc "migrate blob files to the new format"
|
|
task migrate_blob_entries: %i[environment] do
|
|
batch_size = ENV["batch_size"]&.to_i || 1000
|
|
profile = ENV["profile"] == "true" || false
|
|
start_at = ENV["start_at"] || "0" * 64
|
|
num_migrated = 0
|
|
puts "batch_size: #{batch_size}"
|
|
|
|
RubyProf.start if profile
|
|
|
|
def migrate_impl(batch_size, start_at)
|
|
def n2d(n)
|
|
ActiveSupport::NumberHelper.number_to_delimited(n).rjust(8)
|
|
end
|
|
|
|
num_migrated = 0
|
|
num_processed = 0
|
|
start_time = Time.now
|
|
BlobEntry.in_batches(
|
|
of: batch_size,
|
|
start: HexUtil.hex2bin(start_at),
|
|
order: :asc,
|
|
use_ranges: true,
|
|
) do |batch|
|
|
batch_migrated = insert_blob_entries_batch(batch)
|
|
num_migrated += batch_migrated
|
|
num_processed += batch.size
|
|
rate = batch_migrated.to_f / (Time.now - start_time)
|
|
puts [
|
|
"[migrated: #{n2d(num_migrated)}]",
|
|
"[processed: #{n2d(num_processed)}]",
|
|
"[rate: #{rate.round(1).to_s.rjust(5)}/second]",
|
|
"[last: '#{HexUtil.bin2hex(batch.last.sha256)}']",
|
|
].join(" ")
|
|
start_time = Time.now
|
|
end
|
|
num_migrated
|
|
end
|
|
|
|
def insert_blob_entries_batch(batch)
|
|
num_migrated = 0
|
|
|
|
blob_entry_sha256s = batch.pluck(:sha256)
|
|
blob_file_sha256s =
|
|
BlobFile.where(sha256: blob_entry_sha256s).pluck(:sha256)
|
|
missing_sha256s = blob_entry_sha256s - blob_file_sha256s
|
|
|
|
BlobFile.transaction do
|
|
BlobEntry
|
|
.where(sha256: missing_sha256s)
|
|
.each do |blob_entry|
|
|
blob_file = BlobFile.initialize_from_blob_entry(blob_entry)
|
|
sha256_hex = HexUtil.bin2hex(blob_file.sha256)
|
|
begin
|
|
blob_file.save!
|
|
num_migrated += 1
|
|
rescue => e
|
|
puts "error saving blob file #{sha256_hex}: #{e}"
|
|
end
|
|
end
|
|
rescue => e
|
|
puts "error migrating blob entry: #{missing_sha256s.map { |sha256| HexUtil.bin2hex(sha256) }}"
|
|
raise e
|
|
end
|
|
num_migrated
|
|
end
|
|
|
|
def start_thread(batch_size, start_at)
|
|
Thread.new { migrate_impl(batch_size, start_at) }
|
|
end
|
|
|
|
num_threads = 1
|
|
# skip = ((0xFFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF) / num_threads) + 1
|
|
num_migrated =
|
|
(0...num_threads)
|
|
.map do |i|
|
|
# partition the entire sha256 space into num_threads chunks
|
|
# each chunk is 256 / num_threads in size
|
|
# start_at = (skip * i).to_s(16).rjust(32, "0")
|
|
# stop_at = ((skip * (i + 1)) - 1).to_s(16).rjust(32, "0")
|
|
puts "migrate #{start_at}"
|
|
start_thread(batch_size, start_at)
|
|
end
|
|
.map(&:value)
|
|
.sum
|
|
|
|
begin
|
|
base = "profiler/blob_file_migrate"
|
|
FileUtils.mkdir_p(base) unless File.exist?(base)
|
|
result = RubyProf.stop
|
|
File.open("#{base}/profile.txt", "w") do |f|
|
|
RubyProf::GraphPrinter.new(result).print(f, { min_percent: 1 })
|
|
end
|
|
File.open("#{base}/profile.html", "w") do |f|
|
|
RubyProf::CallStackPrinter.new(result).print(f, { min_percent: 1 })
|
|
end
|
|
File.open("#{base}/profile.rubyprof", "w") do |f|
|
|
RubyProf::SpeedscopePrinter.new(result).print(f, { min_percent: 1 })
|
|
end
|
|
puts "wrote profile to #{base}"
|
|
end if profile
|
|
|
|
puts "migrated #{num_migrated} total blob entries"
|
|
end
|
|
|
|
task verify_fs_files: :environment do
|
|
dir = Rails.application.config_for("blob_file_location")
|
|
num_verified = 0
|
|
Find.find(dir) do |path|
|
|
next if File.directory?(path)
|
|
expected_sha256 = File.basename(path)
|
|
actual_sha256 = Digest::SHA256.file(path).hexdigest
|
|
if expected_sha256 != actual_sha256
|
|
puts "file #{path} has mismatching sha256: #{expected_sha256} != #{actual_sha256}"
|
|
end
|
|
num_verified += 1
|
|
end
|
|
|
|
puts "(#{Rails.env}, #{dir}) verified #{num_verified} files"
|
|
end
|
|
end
|