418 lines
13 KiB
Ruby
418 lines
13 KiB
Ruby
# typed: false
|
|
require "rails_helper"
|
|
|
|
RSpec.describe Tasks::BlobFileMigrationTask do
|
|
let(:log_sink) { StringIO.new }
|
|
let(:migrator) { described_class.new(log_sink: log_sink) }
|
|
|
|
describe "#run" do
|
|
context "with no blob entries" do
|
|
it "runs migration with default settings and logs correctly" do
|
|
result = migrator.run
|
|
|
|
expect(result).to eq(0)
|
|
expect(log_sink.string).to include("batch_size: 1000")
|
|
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
end
|
|
end
|
|
|
|
context "with custom batch size" do
|
|
it "uses the specified batch size in logs" do
|
|
result = migrator.run(batch_size: 500)
|
|
|
|
expect(result).to eq(0)
|
|
expect(log_sink.string).to include("batch_size: 500")
|
|
end
|
|
end
|
|
|
|
context "with custom start SHA256" do
|
|
it "accepts custom start SHA256 parameter" do
|
|
start_sha256 = "ff" * 32
|
|
|
|
result = migrator.run(start_sha256: start_sha256)
|
|
|
|
expect(result).to eq(0)
|
|
expect(log_sink.string).to include("batch_size: 1000")
|
|
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
end
|
|
end
|
|
|
|
context "with start_sha256='last' and no saved progress" do
|
|
it "starts from beginning when no progress is saved" do
|
|
result = migrator.run(start_sha256: "last")
|
|
|
|
expect(result).to eq(0)
|
|
expect(log_sink.string).to include("no saved progress")
|
|
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
end
|
|
end
|
|
|
|
context "with start_sha256='last' and existing progress" do
|
|
let!(:blob_entries) do
|
|
5.times.map { |i| create(:blob_entry, content: "progress #{i}") }
|
|
end
|
|
|
|
before do
|
|
# Simulate saved progress - use the SHA256 of the 3rd blob entry
|
|
progress_sha256_hex = HexUtil.bin2hex(blob_entries[2].sha256)
|
|
GlobalState.set("blob-file-migration-task", progress_sha256_hex)
|
|
end
|
|
|
|
after do
|
|
# Clean up the GlobalState
|
|
GlobalState.find_by(key: "blob-file-migration-task")&.destroy
|
|
end
|
|
|
|
it "resumes from saved progress" do
|
|
result = migrator.run(start_sha256: "last")
|
|
|
|
# Should skip the first 3 entries and migrate the remaining 2
|
|
expect(result).to be >= 0
|
|
expected_progress = HexUtil.bin2hex(blob_entries[2].sha256)
|
|
expect(log_sink.string).to include(expected_progress)
|
|
end
|
|
end
|
|
|
|
context "with existing blob entries that need migration" do
|
|
let!(:blob_entries) do
|
|
3.times.map { |i| create(:blob_entry, content: "content #{i}") }
|
|
end
|
|
|
|
after do
|
|
# Clean up any saved progress
|
|
GlobalState.find_by(key: "blob-file-migration-task")&.destroy
|
|
end
|
|
|
|
it "migrates all blob entries to blob files" do
|
|
expect(BlobFile.count).to eq(0)
|
|
|
|
result = migrator.run
|
|
|
|
expect(result).to eq(3)
|
|
expect(BlobFile.count).to eq(3)
|
|
expect(log_sink.string).to include("migrated 3 total blob entries")
|
|
end
|
|
|
|
it "creates blob files with correct attributes" do
|
|
migrator.run
|
|
|
|
blob_entries.each do |blob_entry|
|
|
blob_file = BlobFile.find_by(sha256: blob_entry.sha256)
|
|
expect(blob_file).to be_present
|
|
expect(blob_file.content_type).to eq(blob_entry.content_type)
|
|
expect(blob_file.size_bytes).to eq(blob_entry.size)
|
|
expect(blob_file.content_bytes).to eq(blob_entry.contents)
|
|
end
|
|
end
|
|
|
|
it "saves progress to GlobalState after migration" do
|
|
migrator.run
|
|
|
|
# Check that progress was saved
|
|
saved_progress = GlobalState.get("blob-file-migration-task")
|
|
expect(saved_progress).to be_present
|
|
expect(saved_progress.length).to eq(64) # Hex SHA256 length
|
|
|
|
# Should be the SHA256 of the last migrated entry
|
|
last_migrated_sha256_hex = HexUtil.bin2hex(BlobFile.last.sha256)
|
|
expect(saved_progress).to eq(last_migrated_sha256_hex)
|
|
end
|
|
|
|
it "logs progress during migration" do
|
|
migrator.run(batch_size: 2)
|
|
|
|
log_output = log_sink.string
|
|
expect(log_output).to include("migrated:")
|
|
expect(log_output).to include("processed:")
|
|
expect(log_output).to include("rate:")
|
|
end
|
|
end
|
|
|
|
context "with existing blob entries that already have blob files" do
|
|
let!(:blob_entry) { create(:blob_entry, content: "existing content") }
|
|
let!(:existing_blob_file) do
|
|
create(
|
|
:blob_file,
|
|
contents: blob_entry.contents,
|
|
content_type: blob_entry.content_type,
|
|
)
|
|
end
|
|
|
|
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
|
|
it "does not create duplicate blob files" do
|
|
expect(BlobFile.count).to eq(1)
|
|
|
|
result = migrator.run
|
|
|
|
expect(result).to eq(0) # No new migrations
|
|
expect(BlobFile.count).to eq(1) # Same count
|
|
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
end
|
|
|
|
it "saves the last migrated sha256, even if it has already been migrated" do
|
|
migrator.run
|
|
final_progress = GlobalState.get("blob-file-migration-task")
|
|
expect(final_progress).to eq(HexUtil.bin2hex(blob_entry.sha256))
|
|
end
|
|
end
|
|
|
|
context "with mixed scenarios" do
|
|
let!(:blob_entry_needing_migration) do
|
|
create(:blob_entry, content: "needs migration")
|
|
end
|
|
let!(:blob_entry_with_file) { create(:blob_entry, content: "has file") }
|
|
let!(:existing_blob_file) do
|
|
create(
|
|
:blob_file,
|
|
contents: blob_entry_with_file.contents,
|
|
content_type: blob_entry_with_file.content_type,
|
|
)
|
|
end
|
|
|
|
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
|
|
it "only migrates entries that need migration" do
|
|
expect(BlobFile.count).to eq(1)
|
|
|
|
result = migrator.run
|
|
|
|
expect(result).to eq(1) # Only one new migration
|
|
expect(BlobFile.count).to eq(2) # Now has both
|
|
|
|
# Verify the new blob file was created correctly
|
|
new_blob_file =
|
|
BlobFile.find_by(sha256: blob_entry_needing_migration.sha256)
|
|
expect(new_blob_file).to be_present
|
|
expect(new_blob_file.content_bytes).to eq(
|
|
blob_entry_needing_migration.contents,
|
|
)
|
|
end
|
|
|
|
it "saves progress for the actually migrated entry" do
|
|
migrator.run
|
|
|
|
saved_progress = GlobalState.get("blob-file-migration-task")
|
|
expected_progress = HexUtil.bin2hex(blob_entry_needing_migration.sha256)
|
|
expect(saved_progress).to eq(expected_progress)
|
|
end
|
|
end
|
|
|
|
context "with different batch sizes" do
|
|
let!(:blob_entries) do
|
|
5.times.map { |i| create(:blob_entry, content: "batch content #{i}") }
|
|
end
|
|
|
|
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
|
|
it "handles batch size of 1" do
|
|
result = migrator.run(batch_size: 1)
|
|
|
|
expect(result).to eq(5)
|
|
expect(BlobFile.count).to eq(5)
|
|
end
|
|
|
|
it "handles batch size larger than total entries" do
|
|
result = migrator.run(batch_size: 100)
|
|
|
|
expect(result).to eq(5)
|
|
expect(BlobFile.count).to eq(5)
|
|
end
|
|
|
|
it "handles custom batch size smaller than total entries" do
|
|
result = migrator.run(batch_size: 2)
|
|
|
|
expect(result).to eq(5)
|
|
expect(BlobFile.count).to eq(5)
|
|
|
|
# Verify all entries were migrated correctly
|
|
blob_entries.each do |blob_entry|
|
|
blob_file = BlobFile.find_by(sha256: blob_entry.sha256)
|
|
expect(blob_file).to be_present
|
|
end
|
|
end
|
|
|
|
it "saves progress multiple times with small batch size" do
|
|
migrator.run(batch_size: 2)
|
|
|
|
# Should see multiple progress saves in the log
|
|
expect(log_sink.string.scan(/migrated:/).count).to be > 1
|
|
end
|
|
end
|
|
|
|
context "with idempotent runs" do
|
|
let!(:blob_entries) do
|
|
3.times.map { |i| create(:blob_entry, content: "idempotent #{i}") }
|
|
end
|
|
|
|
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
|
|
it "handles duplicate runs gracefully" do
|
|
first_result = migrator.run
|
|
expect(first_result).to eq(3)
|
|
expect(BlobFile.count).to eq(3)
|
|
|
|
# Reset log sink for second run
|
|
log_sink.truncate(0)
|
|
log_sink.rewind
|
|
|
|
second_result = migrator.run
|
|
expect(second_result).to eq(0) # No new migrations
|
|
expect(BlobFile.count).to eq(3) # Same count
|
|
|
|
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
end
|
|
end
|
|
|
|
context "with large datasets" do
|
|
let!(:blob_entries) do
|
|
10.times.map { |i| create(:blob_entry, content: "large dataset #{i}") }
|
|
end
|
|
|
|
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
|
|
it "successfully migrates large datasets in batches" do
|
|
result = migrator.run(batch_size: 3)
|
|
|
|
expect(result).to eq(10)
|
|
expect(BlobFile.count).to eq(10)
|
|
|
|
# Verify progress logging occurred multiple times
|
|
log_output = log_sink.string
|
|
expect(log_output.scan(/migrated:/).count).to be > 1
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#log_progress" do
|
|
it "logs migration progress with numbers" do
|
|
migrator.send(:log_progress, 100, 150, 25.5, "abc123")
|
|
|
|
log_output = log_sink.string
|
|
expect(log_output).to include("migrated:")
|
|
expect(log_output).to include("processed:")
|
|
expect(log_output).to include("rate:")
|
|
expect(log_output).to include("last:")
|
|
end
|
|
|
|
it "handles nil last_sha256" do
|
|
migrator.send(:log_progress, 100, 150, 25.5, nil)
|
|
|
|
log_output = log_sink.string
|
|
expect(log_output).to include("last: nil")
|
|
end
|
|
|
|
it "includes formatted numbers" do
|
|
migrator.send(:log_progress, 1000, 2500, 10.5, nil)
|
|
|
|
log_output = log_sink.string
|
|
expect(log_output).to include("1,000")
|
|
expect(log_output).to include("2,500")
|
|
expect(log_output).to include("10.5")
|
|
end
|
|
end
|
|
|
|
describe "#format_number" do
|
|
it "formats numbers with delimiters" do
|
|
result = migrator.send(:format_number, 1_234_567)
|
|
|
|
expect(result).to include("1,234,567")
|
|
end
|
|
|
|
it "right-justifies numbers" do
|
|
result = migrator.send(:format_number, 123)
|
|
|
|
expect(result.length).to be >= 8
|
|
end
|
|
|
|
it "handles zero" do
|
|
result = migrator.send(:format_number, 0)
|
|
|
|
expect(result).to include("0")
|
|
expect(result.length).to be >= 8
|
|
end
|
|
|
|
it "handles large numbers" do
|
|
result = migrator.send(:format_number, 999_999_999)
|
|
|
|
expect(result).to include("999,999,999")
|
|
end
|
|
end
|
|
|
|
describe "constructor and basic functionality" do
|
|
it "initializes with log_sink" do
|
|
expect(migrator).to be_a(Tasks::BlobFileMigrationTask)
|
|
end
|
|
|
|
it "uses provided log_sink" do
|
|
custom_sink = StringIO.new
|
|
custom_migrator = described_class.new(log_sink: custom_sink)
|
|
|
|
custom_migrator.run
|
|
|
|
expect(custom_sink.string).to include("batch_size:")
|
|
end
|
|
|
|
it "defaults to $stderr when no log_sink provided" do
|
|
expect { described_class.new }.not_to raise_error
|
|
end
|
|
end
|
|
|
|
describe "ZERO_SHA256 constant" do
|
|
it "defines the zero SHA256 constant" do
|
|
expect(Tasks::BlobFileMigrationTask::ZERO_SHA256).to eq("00" * 32)
|
|
end
|
|
end
|
|
|
|
describe "PROGRESS_KEY constant" do
|
|
it "defines the progress key constant" do
|
|
expect(Tasks::BlobFileMigrationTask::PROGRESS_KEY).to eq(
|
|
"blob-file-migration-task",
|
|
)
|
|
end
|
|
end
|
|
|
|
describe "integration scenarios" do
|
|
it "handles multiple sequential runs without errors" do
|
|
first_result = migrator.run(batch_size: 100)
|
|
expect(first_result).to eq(0)
|
|
|
|
# Reset log sink for second run
|
|
log_sink.truncate(0)
|
|
log_sink.rewind
|
|
|
|
second_result = migrator.run(batch_size: 200)
|
|
expect(second_result).to eq(0)
|
|
|
|
log_output = log_sink.string
|
|
expect(log_output).to include("batch_size: 200")
|
|
end
|
|
|
|
it "properly formats logs with different parameters" do
|
|
migrator.run(batch_size: 42, start_sha256: "aa" * 32)
|
|
|
|
log_output = log_sink.string
|
|
expect(log_output).to include("batch_size: 42")
|
|
expect(log_output).to include("migrated 0 total blob entries")
|
|
end
|
|
end
|
|
|
|
describe "parameter validation" do
|
|
it "accepts valid batch_size parameter" do
|
|
expect { migrator.run(batch_size: 1) }.not_to raise_error
|
|
expect { migrator.run(batch_size: 1000) }.not_to raise_error
|
|
expect { migrator.run(batch_size: 10_000) }.not_to raise_error
|
|
end
|
|
|
|
it "accepts valid start_sha256 parameter" do
|
|
expect { migrator.run(start_sha256: "00" * 32) }.not_to raise_error
|
|
expect { migrator.run(start_sha256: "ff" * 32) }.not_to raise_error
|
|
expect { migrator.run(start_sha256: "ab" * 32) }.not_to raise_error
|
|
end
|
|
|
|
it "accepts 'last' as start_sha256 parameter" do
|
|
expect { migrator.run(start_sha256: "last") }.not_to raise_error
|
|
end
|
|
end
|
|
end
|