remove BlobEntry model
This commit is contained in:
@@ -29,8 +29,6 @@ class BlobEntriesController < ApplicationController
|
|||||||
|
|
||||||
if show_blob_file(sha256, thumb)
|
if show_blob_file(sha256, thumb)
|
||||||
return
|
return
|
||||||
elsif BlobFile.migrate_sha256!(sha256) && show_blob_file(sha256, thumb)
|
|
||||||
return
|
|
||||||
else
|
else
|
||||||
raise ActiveRecord::RecordNotFound
|
raise ActiveRecord::RecordNotFound
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -175,11 +175,11 @@ class Scraper::GalleryDlClient
|
|||||||
|
|
||||||
retries = 0
|
retries = 0
|
||||||
begin
|
begin
|
||||||
response_blob_entry =
|
response_blob_entry = T.let(nil, T.nilable(BlobFile))
|
||||||
BlobEntry.find_or_build(
|
# BlobEntry.find_or_build(
|
||||||
content_type: content_type,
|
# content_type: content_type,
|
||||||
contents: http_event.body,
|
# contents: http_event.body,
|
||||||
)
|
# )
|
||||||
|
|
||||||
log_entry =
|
log_entry =
|
||||||
HttpLogEntry.new(
|
HttpLogEntry.new(
|
||||||
|
|||||||
@@ -1,135 +0,0 @@
|
|||||||
# typed: strict
|
|
||||||
module Tasks
|
|
||||||
class BlobFileMigrationTask < InterruptableTask
|
|
||||||
extend T::Sig
|
|
||||||
|
|
||||||
ZERO_SHA256 = T.let("00" * 32, String)
|
|
||||||
PROGRESS_KEY = T.let("blob-file-migration-task", String)
|
|
||||||
|
|
||||||
sig { override.returns(String) }
|
|
||||||
def progress_key
|
|
||||||
PROGRESS_KEY
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { params(batch_size: Integer, start_sha256: String).returns(Integer) }
|
|
||||||
def run(batch_size: 1000, start_sha256: ZERO_SHA256)
|
|
||||||
# Handle "last" to resume from saved progress
|
|
||||||
actual_start_sha256 = get_progress(start_sha256) || ZERO_SHA256
|
|
||||||
|
|
||||||
log "batch_size: #{batch_size}"
|
|
||||||
if actual_start_sha256 != ZERO_SHA256
|
|
||||||
log "starting from: #{actual_start_sha256}"
|
|
||||||
end
|
|
||||||
|
|
||||||
num_migrated = migrate_impl(batch_size, actual_start_sha256)
|
|
||||||
log "migrated #{num_migrated} total blob entries"
|
|
||||||
num_migrated
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
sig { params(batch_size: Integer, start_sha256: String).returns(Integer) }
|
|
||||||
def migrate_impl(batch_size, start_sha256)
|
|
||||||
num_migrated = 0
|
|
||||||
num_processed = 0
|
|
||||||
start_time = Time.now
|
|
||||||
last_migrated_sha256 = T.let(nil, T.nilable(String))
|
|
||||||
|
|
||||||
BlobEntry.in_batches(
|
|
||||||
of: batch_size,
|
|
||||||
start: HexUtil.hex2bin(start_sha256),
|
|
||||||
order: :asc,
|
|
||||||
use_ranges: true,
|
|
||||||
) do |batch|
|
|
||||||
# Check for interruption before processing each batch
|
|
||||||
break if @interrupt_monitor.interrupted?
|
|
||||||
|
|
||||||
batch_migrated = insert_blob_entries_batch(batch)
|
|
||||||
num_migrated += batch_migrated
|
|
||||||
num_processed += T.cast(batch.size, Integer)
|
|
||||||
rate = batch_migrated.to_f / (Time.now - start_time)
|
|
||||||
|
|
||||||
last_migrated_sha256 = batch.last&.sha256
|
|
||||||
log_progress(num_migrated, num_processed, rate, last_migrated_sha256)
|
|
||||||
|
|
||||||
if last_migrated_sha256
|
|
||||||
save_progress(HexUtil.bin2hex(last_migrated_sha256))
|
|
||||||
end
|
|
||||||
|
|
||||||
start_time = Time.now
|
|
||||||
|
|
||||||
# Check for interruption after processing each batch
|
|
||||||
break if @interrupt_monitor.interrupted?
|
|
||||||
end
|
|
||||||
|
|
||||||
num_migrated
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { params(batch: ActiveRecord::Relation).returns(Integer) }
|
|
||||||
def insert_blob_entries_batch(batch)
|
|
||||||
num_migrated = 0
|
|
||||||
|
|
||||||
blob_entry_sha256s = batch.pluck(:sha256)
|
|
||||||
blob_file_sha256s =
|
|
||||||
BlobFile.where(sha256: blob_entry_sha256s).pluck(:sha256)
|
|
||||||
missing_sha256s = blob_entry_sha256s - blob_file_sha256s
|
|
||||||
|
|
||||||
BlobFile.transaction do
|
|
||||||
BlobEntry
|
|
||||||
.where(sha256: missing_sha256s)
|
|
||||||
.each do |blob_entry|
|
|
||||||
blob_file = BlobFile.initialize_from_blob_entry(blob_entry)
|
|
||||||
|
|
||||||
begin
|
|
||||||
blob_file.save!
|
|
||||||
num_migrated += 1
|
|
||||||
rescue => e
|
|
||||||
if sha256 = blob_file.sha256
|
|
||||||
sha256_hex = HexUtil.bin2hex(sha256)
|
|
||||||
log "error saving blob file #{sha256_hex}: #{e}"
|
|
||||||
else
|
|
||||||
log "error saving blob file: #{e}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rescue => e
|
|
||||||
missing_sha256s_hex =
|
|
||||||
missing_sha256s.map { |sha256| HexUtil.bin2hex(sha256) }
|
|
||||||
log "error migrating blob entry: #{missing_sha256s_hex}"
|
|
||||||
raise e
|
|
||||||
end
|
|
||||||
|
|
||||||
num_migrated
|
|
||||||
end
|
|
||||||
|
|
||||||
sig do
|
|
||||||
params(
|
|
||||||
num_migrated: Integer,
|
|
||||||
num_processed: Integer,
|
|
||||||
rate: Float,
|
|
||||||
last_sha256: T.nilable(String),
|
|
||||||
).void
|
|
||||||
end
|
|
||||||
def log_progress(num_migrated, num_processed, rate, last_sha256)
|
|
||||||
last_hex =
|
|
||||||
case last_sha256
|
|
||||||
when String
|
|
||||||
HexUtil.bin2hex(last_sha256)
|
|
||||||
else
|
|
||||||
"nil"
|
|
||||||
end
|
|
||||||
|
|
||||||
log [
|
|
||||||
"migrated: #{format_number(num_migrated)}",
|
|
||||||
"processed: #{format_number(num_processed)}",
|
|
||||||
"rate: #{rate.round(1).to_s.rjust(5)}/second",
|
|
||||||
"last: #{last_hex}",
|
|
||||||
].join(" | ")
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { params(number: Integer).returns(String) }
|
|
||||||
def format_number(number)
|
|
||||||
ActiveSupport::NumberHelper.number_to_delimited(number).rjust(8)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@@ -1,119 +0,0 @@
|
|||||||
# typed: strict
|
|
||||||
class BlobEntry < ReduxApplicationRecord
|
|
||||||
self.table_name = "blob_entries_p"
|
|
||||||
|
|
||||||
include ImmutableModel
|
|
||||||
before_destroy { raise ActiveRecord::ReadOnlyRecord }
|
|
||||||
|
|
||||||
self.primary_key = :sha256
|
|
||||||
EMPTY_FILE_SHA256 =
|
|
||||||
T.let(
|
|
||||||
HexUtil.hex2bin(
|
|
||||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
|
||||||
),
|
|
||||||
String,
|
|
||||||
)
|
|
||||||
|
|
||||||
belongs_to :base,
|
|
||||||
optional: true,
|
|
||||||
foreign_key: :base_sha256,
|
|
||||||
class_name: "::BlobEntry"
|
|
||||||
|
|
||||||
after_create do
|
|
||||||
actual_sha256 = Digest::SHA256.digest(contents)
|
|
||||||
raise("digest mismatch for BlobEntry") if sha256 != actual_sha256
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { returns(T.nilable(BlobEntry)) }
|
|
||||||
def base
|
|
||||||
@base_model = T.let(@base_model, T.nilable(BlobEntry))
|
|
||||||
@base_model ||=
|
|
||||||
super ||
|
|
||||||
if (sha256 = base_sha256)
|
|
||||||
self.class.ensure(sha256)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
validates_presence_of(:sha256, :content_type, :size)
|
|
||||||
validates :contents,
|
|
||||||
length: {
|
|
||||||
minimum: 0,
|
|
||||||
allow_nil: false,
|
|
||||||
message: "can't be nil",
|
|
||||||
}
|
|
||||||
validates :sha256, length: { is: 32 }
|
|
||||||
validates :base_sha256, length: { is: 32 }, if: :base_sha256
|
|
||||||
|
|
||||||
sig { params(sha256: String).returns(BlobEntry) }
|
|
||||||
def self.ensure(sha256)
|
|
||||||
find_by(sha256: sha256) ||
|
|
||||||
raise("blob #{HexUtil.bin2hex(sha256)} does not exist")
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { returns(T.nilable(String)) }
|
|
||||||
def sha256_hex
|
|
||||||
if (hash = self.sha256)
|
|
||||||
HexUtil.bin2hex(hash)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { returns(T.nilable(String)) }
|
|
||||||
def contents
|
|
||||||
@contents ||=
|
|
||||||
T.let(
|
|
||||||
begin
|
|
||||||
contents_raw = self.read_attribute(:contents)
|
|
||||||
base_contents = self.base&.contents
|
|
||||||
if base_contents
|
|
||||||
XDiff.patch(base_contents, contents_raw)
|
|
||||||
else
|
|
||||||
contents_raw
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
T.nilable(String),
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { returns(T.nilable(Integer)) }
|
|
||||||
def bytes_stored
|
|
||||||
self.read_attribute(:contents)&.size
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { params(content_type: String, contents: String).returns(BlobEntry) }
|
|
||||||
def self.find_or_build(content_type:, contents:)
|
|
||||||
sha256 = Digest::SHA256.digest(contents)
|
|
||||||
BlobEntry.find_by(sha256: sha256) ||
|
|
||||||
begin
|
|
||||||
build_record(
|
|
||||||
content_type: content_type,
|
|
||||||
sha256: sha256,
|
|
||||||
contents: contents,
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
DIFFABLE_CONTENT_TYPES =
|
|
||||||
T.let(
|
|
||||||
[%r{text/html}, %r{text/plain}, %r{application/json}],
|
|
||||||
T::Array[Regexp],
|
|
||||||
)
|
|
||||||
|
|
||||||
sig do
|
|
||||||
params(
|
|
||||||
content_type: String,
|
|
||||||
contents: String,
|
|
||||||
sha256: T.nilable(String),
|
|
||||||
).returns(BlobEntry)
|
|
||||||
end
|
|
||||||
def self.build_record(content_type:, contents:, sha256: nil)
|
|
||||||
sha256 ||= Digest::SHA256.digest(contents)
|
|
||||||
record =
|
|
||||||
self.new(
|
|
||||||
sha256: sha256,
|
|
||||||
content_type: content_type,
|
|
||||||
size: contents.size,
|
|
||||||
contents: contents,
|
|
||||||
)
|
|
||||||
record
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@@ -64,15 +64,6 @@ class BlobFile < ReduxApplicationRecord
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
sig { params(blob_entry: BlobEntry).returns(BlobFile) }
|
|
||||||
def self.find_or_initialize_from_blob_entry(blob_entry)
|
|
||||||
BlobFile.find_or_initialize_by(sha256: blob_entry.sha256) do |blob_file|
|
|
||||||
blob_file.content_type = blob_entry.content_type
|
|
||||||
blob_file.content_bytes = blob_entry.contents
|
|
||||||
blob_file.created_at = blob_entry.created_at
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
sig do
|
sig do
|
||||||
params(
|
params(
|
||||||
contents: String,
|
contents: String,
|
||||||
@@ -80,43 +71,11 @@ class BlobFile < ReduxApplicationRecord
|
|||||||
).returns(BlobFile)
|
).returns(BlobFile)
|
||||||
end
|
end
|
||||||
def self.find_or_initialize_from_contents(contents, &block)
|
def self.find_or_initialize_from_contents(contents, &block)
|
||||||
sha256 = Digest::SHA256.digest(contents)
|
BlobFile.find_or_initialize_by(
|
||||||
BlobFile.migrate_sha256!(sha256) ||
|
sha256: Digest::SHA256.digest(contents),
|
||||||
BlobFile.find_or_initialize_by(
|
) do |blob_file|
|
||||||
sha256: Digest::SHA256.digest(contents),
|
blob_file.content_bytes = contents
|
||||||
) do |blob_file|
|
block.call(blob_file)
|
||||||
blob_file.content_bytes = contents
|
|
||||||
block.call(blob_file)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { params(blob_entry: BlobEntry).returns(BlobFile) }
|
|
||||||
def self.initialize_from_blob_entry(blob_entry)
|
|
||||||
BlobFile.new(
|
|
||||||
sha256: blob_entry.sha256,
|
|
||||||
content_type: blob_entry.content_type,
|
|
||||||
content_bytes: blob_entry.contents,
|
|
||||||
created_at: blob_entry.created_at,
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { params(sha256: String).returns(T.nilable(BlobFile)) }
|
|
||||||
def self.migrate_sha256!(sha256)
|
|
||||||
retried = T.let(false, T::Boolean)
|
|
||||||
# convert to binary if hex formatted
|
|
||||||
sha256 = HexUtil.hex2bin(sha256) if sha256.length == 64
|
|
||||||
begin
|
|
||||||
blob_file = BlobFile.find_by(sha256: sha256)
|
|
||||||
return blob_file if blob_file
|
|
||||||
blob_entry = BlobEntry.find_by(sha256: sha256)
|
|
||||||
return nil unless blob_entry
|
|
||||||
blob_file = BlobFile.find_or_initialize_from_blob_entry(blob_entry)
|
|
||||||
return nil unless blob_file.save
|
|
||||||
blob_file
|
|
||||||
rescue ActiveRecord::RecordNotUnique => e
|
|
||||||
raise e if retried
|
|
||||||
retried = true
|
|
||||||
retry
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -90,18 +90,6 @@ class HttpLogEntry < ReduxApplicationRecord
|
|||||||
response&.content_bytes
|
response&.content_bytes
|
||||||
end
|
end
|
||||||
|
|
||||||
sig { returns(T.nilable(BlobFile)) }
|
|
||||||
def response
|
|
||||||
super ||
|
|
||||||
begin
|
|
||||||
@response_blob_file ||= T.let(nil, T.nilable(BlobFile))
|
|
||||||
if sha256 = response_sha256
|
|
||||||
@response_blob_file ||= BlobFile.migrate_sha256!(sha256)
|
|
||||||
end
|
|
||||||
@response_blob_file
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
sig { params(uri: T.any(String, Addressable::URI)).void }
|
sig { params(uri: T.any(String, Addressable::URI)).void }
|
||||||
def uri=(uri)
|
def uri=(uri)
|
||||||
uri = Addressable::URI.parse(uri) if uri.is_a?(String)
|
uri = Addressable::URI.parse(uri) if uri.is_a?(String)
|
||||||
|
|||||||
@@ -5,14 +5,6 @@ T.bind(self, T.all(Rake::DSL, Object))
|
|||||||
require "find"
|
require "find"
|
||||||
|
|
||||||
namespace :blob_file do
|
namespace :blob_file do
|
||||||
desc "migrate blob entries to blob files"
|
|
||||||
task migrate_blob_entries: %i[environment] do
|
|
||||||
batch_size = ENV["batch_size"]&.to_i || 1000
|
|
||||||
start_at = ENV["start_at"] || Tasks::BlobFileMigrationTask::ZERO_SHA256
|
|
||||||
|
|
||||||
Tasks::BlobFileMigrationTask.new.run(batch_size:, start_sha256: start_at)
|
|
||||||
end
|
|
||||||
|
|
||||||
task verify_fs_files: :environment do
|
task verify_fs_files: :environment do
|
||||||
dir = Rails.application.config_for("blob_file_location")
|
dir = Rails.application.config_for("blob_file_location")
|
||||||
num_verified = 0
|
num_verified = 0
|
||||||
@@ -28,4 +20,65 @@ namespace :blob_file do
|
|||||||
|
|
||||||
puts "(#{Rails.env}, #{dir}) verified #{num_verified} files"
|
puts "(#{Rails.env}, #{dir}) verified #{num_verified} files"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# desc "Migrate remaining blob entries"
|
||||||
|
# task migrate_remaining_blob_entries: :environment do
|
||||||
|
# sha256s = %w[
|
||||||
|
# 07fa02e3997c946102d0ad22ca43f55fa6a0345151359eddd36c30ef60bb1540
|
||||||
|
# 0a33463190a69f2e060c0efe8b75edb705b5f79001067addd59d30a519ebb94a
|
||||||
|
# 0c676b1e0187dc490d9d0122cb176d626c6d26601bcb98ad68ac16ea0a16bfd1
|
||||||
|
# 0d8d60ad95f514de9969b92a7d390d6eea8e074e5ffb0142b0f9e0211abb10eb
|
||||||
|
# 0da2068b44cadd7d535cda3857d03cad209cb630bf6f8745f52c065e5e97da87
|
||||||
|
# 0f75a78ede2e4b46c0e49c5c3bbc8e0f9870e33dc6863f070dcb355d1879ef4b
|
||||||
|
# 14be857666df4f274619605a39070b7813f41af701c33535632d62c618170f10
|
||||||
|
# 1a44fee2d07b7e895f9f6d322a86de579adb62db539223483936807bc13b9233
|
||||||
|
# 1b58fef9ba49fec9e016072bae4411c08c67ac615c5471e5cb2576dfda736b11
|
||||||
|
# 1c2d01d8287e1edf6f361324922de208652a4c0678296e2fabe8400ec2e11c17
|
||||||
|
# 1c66b43d90e2c678a2d78b04fca731a0491735c04b750fd96d4dce03cac69154
|
||||||
|
# 2aa4609383a75b4b063a2c21aa398954a15f51a67a5f629b700c180bd3b191f4
|
||||||
|
# 2c2a80854ad33ac3746c872603c098fdd3603ab5204e0e551c69857c0365a3c7
|
||||||
|
# 2dfddc505b4da5a88e4a82d9c67bab60b7965844656edbd3f8c889a3b076c201
|
||||||
|
# 325753454de13d1edc3641c3796c8368b3a9320115d0d40f4a3603d45594e004
|
||||||
|
# 355f35fa61217d457c93fdea4fbe361072af63e15613c8a595208913e09078fa
|
||||||
|
# 3f2eb002219b9e5665f02b4ff96585b517bddb55cf28fca15df2bd017385bd71
|
||||||
|
# 3f454c7d9aad7a2690eef76c143ec1297842c6cb784d7432b0ef95c97ae733f3
|
||||||
|
# 4208ae533155713dc32d1e8a1400ae319b3ee7cbbb1ca018487250025fdd4756
|
||||||
|
# 4471934c1c72f6347e07a0612ac43159f203006447feedb2a425c263fa88fabb
|
||||||
|
# 4ebd1243342b5ea733ca42c4cd6c5b265291c6eca0055ec8898083594fb92113
|
||||||
|
# 5711179b76168e448d4382cb463259b871f174eb4efb4558f5afbf31dbcf1d50
|
||||||
|
# 591d9e3afeeb5dac799a18400314049e218478bc1ffc770e669078221792652e
|
||||||
|
# 593451c9fae341c84848f966485324652eadd0ef24f97fdbc3f15597570aa0a5
|
||||||
|
# 5b4d6da7144136746efc5fa56c3c20ea291591d67119347d5322d180b8003f3d
|
||||||
|
# 5d7e4114ad10b8a4e2259be01a8453dd310ea672056c646a1e5c8e99ce59eb50
|
||||||
|
# 5d97d6da021bf80601706f7545948964cd3a12d3dd62f8cb9e9097f45168ca8e
|
||||||
|
# 5dd6d4c961ad2b128b2f6146c7a15d6a2d91dd75d49570e8f7abce358838e500
|
||||||
|
# cb42f987805c8b2a6fb6e89e8b48fc686812ee922426bb5f45ee9a0bce80ad11
|
||||||
|
# cb42f9a0371131629e8fd843e025d2cc8e113da02615442697c6af63a07cd932
|
||||||
|
# cb42f9c0de416ac4affef18668ceb0995a676aa8282bd722f3a2d860aff45d68
|
||||||
|
# cb42f9dcd71c5b72e1026547cd02d0e6d1b46b4270d7148c0a5c0d4437940a7b
|
||||||
|
# cb42f9e3342302e8d13349f624749942042ed7478e2db5900989c2805b84fbce
|
||||||
|
# cb42fa3e6a21220ffc5bb54ceea9ac2d6488672d9bee20dec4371b5a389a9266
|
||||||
|
# cb42faad2eb0d92eb4ee7da5f7f509dfdc53d1428917999207d918b23d02953a
|
||||||
|
# cb42fb81e894febfabc593c92f7a739d776520752401aa81837ece9a7cb16b35
|
||||||
|
# cb42fb90f37dbbed886820857b0d8401ba2b98523e80af0fce0afb4944b5904a
|
||||||
|
# cb42fbbd457c5c9fda3d89c2a9919ef7995d665a7dd3a4a8b515abdeb81ff27e
|
||||||
|
# cb42fc1d13ba232c984193928c23ebb5233a7af66746421b4ee49cf45d607812
|
||||||
|
# cb42fc23f103a5b155267f91513b1e6660811696c0dbf375b363dd23e9f080ba
|
||||||
|
# cb42fc823641440acacca735d217a5b0c67b97bae400cc0d536e5142eec4d198
|
||||||
|
# cb42fc9794fea6fb6d274f3022fb494d0c1da308b21ca49329279dde6453758f
|
||||||
|
# cb42fd19e366b5c9c52dfd7d28709584a27ad1f6db106ae3add830634b39840b
|
||||||
|
# cb42fd473013630ee59c439d28d5fd993a2fd98987eebf962f97793593d880a2
|
||||||
|
# cb42fd513c249cb3cbd96678b4beb2f1a409ce14302ed46730eb65d0d2223069
|
||||||
|
# cb42fd9b2dfe028a85a575fa40c8b1e5f2210841411daa02225fc3f345b89d2c
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# sha256s.each do |sha256|
|
||||||
|
# blob_file = BlobFile.migrate_sha256!(sha256)
|
||||||
|
# if blob_file.persisted?
|
||||||
|
# puts "migrated #{sha256}"
|
||||||
|
# else
|
||||||
|
# puts "failed to migrate #{sha256}"
|
||||||
|
# end
|
||||||
|
# end
|
||||||
|
# end
|
||||||
end
|
end
|
||||||
|
|||||||
1357
sorbet/rbi/dsl/blob_entry.rbi
generated
1357
sorbet/rbi/dsl/blob_entry.rbi
generated
File diff suppressed because it is too large
Load Diff
@@ -1,20 +0,0 @@
|
|||||||
# typed: false
|
|
||||||
FactoryBot.define do
|
|
||||||
factory :blob_entry do
|
|
||||||
transient { content { "test content #{SecureRandom.alphanumeric(10)}" } }
|
|
||||||
|
|
||||||
content_type { "text/plain" }
|
|
||||||
sha256 { Digest::SHA256.digest(content) }
|
|
||||||
contents { content }
|
|
||||||
size { content.bytesize }
|
|
||||||
|
|
||||||
trait :html do
|
|
||||||
content_type { "text/html" }
|
|
||||||
end
|
|
||||||
|
|
||||||
trait :json do
|
|
||||||
content_type { "application/json" }
|
|
||||||
content { "{}" }
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@@ -1,417 +0,0 @@
|
|||||||
# typed: false
|
|
||||||
require "rails_helper"
|
|
||||||
|
|
||||||
RSpec.describe Tasks::BlobFileMigrationTask do
|
|
||||||
let(:log_sink) { StringIO.new }
|
|
||||||
let(:migrator) { described_class.new(log_sink: log_sink) }
|
|
||||||
|
|
||||||
describe "#run" do
|
|
||||||
context "with no blob entries" do
|
|
||||||
it "runs migration with default settings and logs correctly" do
|
|
||||||
result = migrator.run
|
|
||||||
|
|
||||||
expect(result).to eq(0)
|
|
||||||
expect(log_sink.string).to include("batch_size: 1000")
|
|
||||||
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with custom batch size" do
|
|
||||||
it "uses the specified batch size in logs" do
|
|
||||||
result = migrator.run(batch_size: 500)
|
|
||||||
|
|
||||||
expect(result).to eq(0)
|
|
||||||
expect(log_sink.string).to include("batch_size: 500")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with custom start SHA256" do
|
|
||||||
it "accepts custom start SHA256 parameter" do
|
|
||||||
start_sha256 = "ff" * 32
|
|
||||||
|
|
||||||
result = migrator.run(start_sha256: start_sha256)
|
|
||||||
|
|
||||||
expect(result).to eq(0)
|
|
||||||
expect(log_sink.string).to include("batch_size: 1000")
|
|
||||||
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with start_sha256='last' and no saved progress" do
|
|
||||||
it "starts from beginning when no progress is saved" do
|
|
||||||
result = migrator.run(start_sha256: "last")
|
|
||||||
|
|
||||||
expect(result).to eq(0)
|
|
||||||
expect(log_sink.string).to include("no saved progress")
|
|
||||||
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with start_sha256='last' and existing progress" do
|
|
||||||
let!(:blob_entries) do
|
|
||||||
5.times.map { |i| create(:blob_entry, content: "progress #{i}") }
|
|
||||||
end
|
|
||||||
|
|
||||||
before do
|
|
||||||
# Simulate saved progress - use the SHA256 of the 3rd blob entry
|
|
||||||
progress_sha256_hex = HexUtil.bin2hex(blob_entries[2].sha256)
|
|
||||||
GlobalState.set("blob-file-migration-task", progress_sha256_hex)
|
|
||||||
end
|
|
||||||
|
|
||||||
after do
|
|
||||||
# Clean up the GlobalState
|
|
||||||
GlobalState.find_by(key: "blob-file-migration-task")&.destroy
|
|
||||||
end
|
|
||||||
|
|
||||||
it "resumes from saved progress" do
|
|
||||||
result = migrator.run(start_sha256: "last")
|
|
||||||
|
|
||||||
# Should skip the first 3 entries and migrate the remaining 2
|
|
||||||
expect(result).to be >= 0
|
|
||||||
expected_progress = HexUtil.bin2hex(blob_entries[2].sha256)
|
|
||||||
expect(log_sink.string).to include(expected_progress)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with existing blob entries that need migration" do
|
|
||||||
let!(:blob_entries) do
|
|
||||||
3.times.map { |i| create(:blob_entry, content: "content #{i}") }
|
|
||||||
end
|
|
||||||
|
|
||||||
after do
|
|
||||||
# Clean up any saved progress
|
|
||||||
GlobalState.find_by(key: "blob-file-migration-task")&.destroy
|
|
||||||
end
|
|
||||||
|
|
||||||
it "migrates all blob entries to blob files" do
|
|
||||||
expect(BlobFile.count).to eq(0)
|
|
||||||
|
|
||||||
result = migrator.run
|
|
||||||
|
|
||||||
expect(result).to eq(3)
|
|
||||||
expect(BlobFile.count).to eq(3)
|
|
||||||
expect(log_sink.string).to include("migrated 3 total blob entries")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "creates blob files with correct attributes" do
|
|
||||||
migrator.run
|
|
||||||
|
|
||||||
blob_entries.each do |blob_entry|
|
|
||||||
blob_file = BlobFile.find_by(sha256: blob_entry.sha256)
|
|
||||||
expect(blob_file).to be_present
|
|
||||||
expect(blob_file.content_type).to eq(blob_entry.content_type)
|
|
||||||
expect(blob_file.size_bytes).to eq(blob_entry.size)
|
|
||||||
expect(blob_file.content_bytes).to eq(blob_entry.contents)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
it "saves progress to GlobalState after migration" do
|
|
||||||
migrator.run
|
|
||||||
|
|
||||||
# Check that progress was saved
|
|
||||||
saved_progress = GlobalState.get("blob-file-migration-task")
|
|
||||||
expect(saved_progress).to be_present
|
|
||||||
expect(saved_progress.length).to eq(64) # Hex SHA256 length
|
|
||||||
|
|
||||||
# Should be the SHA256 of the last migrated entry
|
|
||||||
last_migrated_sha256_hex = HexUtil.bin2hex(BlobFile.last.sha256)
|
|
||||||
expect(saved_progress).to eq(last_migrated_sha256_hex)
|
|
||||||
end
|
|
||||||
|
|
||||||
it "logs progress during migration" do
|
|
||||||
migrator.run(batch_size: 2)
|
|
||||||
|
|
||||||
log_output = log_sink.string
|
|
||||||
expect(log_output).to include("migrated:")
|
|
||||||
expect(log_output).to include("processed:")
|
|
||||||
expect(log_output).to include("rate:")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with existing blob entries that already have blob files" do
|
|
||||||
let!(:blob_entry) { create(:blob_entry, content: "existing content") }
|
|
||||||
let!(:existing_blob_file) do
|
|
||||||
create(
|
|
||||||
:blob_file,
|
|
||||||
contents: blob_entry.contents,
|
|
||||||
content_type: blob_entry.content_type,
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
||||||
|
|
||||||
it "does not create duplicate blob files" do
|
|
||||||
expect(BlobFile.count).to eq(1)
|
|
||||||
|
|
||||||
result = migrator.run
|
|
||||||
|
|
||||||
expect(result).to eq(0) # No new migrations
|
|
||||||
expect(BlobFile.count).to eq(1) # Same count
|
|
||||||
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "saves the last migrated sha256, even if it has already been migrated" do
|
|
||||||
migrator.run
|
|
||||||
final_progress = GlobalState.get("blob-file-migration-task")
|
|
||||||
expect(final_progress).to eq(HexUtil.bin2hex(blob_entry.sha256))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with mixed scenarios" do
|
|
||||||
let!(:blob_entry_needing_migration) do
|
|
||||||
create(:blob_entry, content: "needs migration")
|
|
||||||
end
|
|
||||||
let!(:blob_entry_with_file) { create(:blob_entry, content: "has file") }
|
|
||||||
let!(:existing_blob_file) do
|
|
||||||
create(
|
|
||||||
:blob_file,
|
|
||||||
contents: blob_entry_with_file.contents,
|
|
||||||
content_type: blob_entry_with_file.content_type,
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
||||||
|
|
||||||
it "only migrates entries that need migration" do
|
|
||||||
expect(BlobFile.count).to eq(1)
|
|
||||||
|
|
||||||
result = migrator.run
|
|
||||||
|
|
||||||
expect(result).to eq(1) # Only one new migration
|
|
||||||
expect(BlobFile.count).to eq(2) # Now has both
|
|
||||||
|
|
||||||
# Verify the new blob file was created correctly
|
|
||||||
new_blob_file =
|
|
||||||
BlobFile.find_by(sha256: blob_entry_needing_migration.sha256)
|
|
||||||
expect(new_blob_file).to be_present
|
|
||||||
expect(new_blob_file.content_bytes).to eq(
|
|
||||||
blob_entry_needing_migration.contents,
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
it "saves progress for the actually migrated entry" do
|
|
||||||
migrator.run
|
|
||||||
|
|
||||||
saved_progress = GlobalState.get("blob-file-migration-task")
|
|
||||||
expected_progress = HexUtil.bin2hex(blob_entry_needing_migration.sha256)
|
|
||||||
expect(saved_progress).to eq(expected_progress)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with different batch sizes" do
|
|
||||||
let!(:blob_entries) do
|
|
||||||
5.times.map { |i| create(:blob_entry, content: "batch content #{i}") }
|
|
||||||
end
|
|
||||||
|
|
||||||
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
||||||
|
|
||||||
it "handles batch size of 1" do
|
|
||||||
result = migrator.run(batch_size: 1)
|
|
||||||
|
|
||||||
expect(result).to eq(5)
|
|
||||||
expect(BlobFile.count).to eq(5)
|
|
||||||
end
|
|
||||||
|
|
||||||
it "handles batch size larger than total entries" do
|
|
||||||
result = migrator.run(batch_size: 100)
|
|
||||||
|
|
||||||
expect(result).to eq(5)
|
|
||||||
expect(BlobFile.count).to eq(5)
|
|
||||||
end
|
|
||||||
|
|
||||||
it "handles custom batch size smaller than total entries" do
|
|
||||||
result = migrator.run(batch_size: 2)
|
|
||||||
|
|
||||||
expect(result).to eq(5)
|
|
||||||
expect(BlobFile.count).to eq(5)
|
|
||||||
|
|
||||||
# Verify all entries were migrated correctly
|
|
||||||
blob_entries.each do |blob_entry|
|
|
||||||
blob_file = BlobFile.find_by(sha256: blob_entry.sha256)
|
|
||||||
expect(blob_file).to be_present
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
it "saves progress multiple times with small batch size" do
|
|
||||||
migrator.run(batch_size: 2)
|
|
||||||
|
|
||||||
# Should see multiple progress saves in the log
|
|
||||||
expect(log_sink.string.scan(/migrated:/).count).to be > 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with idempotent runs" do
|
|
||||||
let!(:blob_entries) do
|
|
||||||
3.times.map { |i| create(:blob_entry, content: "idempotent #{i}") }
|
|
||||||
end
|
|
||||||
|
|
||||||
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
||||||
|
|
||||||
it "handles duplicate runs gracefully" do
|
|
||||||
first_result = migrator.run
|
|
||||||
expect(first_result).to eq(3)
|
|
||||||
expect(BlobFile.count).to eq(3)
|
|
||||||
|
|
||||||
# Reset log sink for second run
|
|
||||||
log_sink.truncate(0)
|
|
||||||
log_sink.rewind
|
|
||||||
|
|
||||||
second_result = migrator.run
|
|
||||||
expect(second_result).to eq(0) # No new migrations
|
|
||||||
expect(BlobFile.count).to eq(3) # Same count
|
|
||||||
|
|
||||||
expect(log_sink.string).to include("migrated 0 total blob entries")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with large datasets" do
|
|
||||||
let!(:blob_entries) do
|
|
||||||
10.times.map { |i| create(:blob_entry, content: "large dataset #{i}") }
|
|
||||||
end
|
|
||||||
|
|
||||||
after { GlobalState.find_by(key: "blob-file-migration-task")&.destroy }
|
|
||||||
|
|
||||||
it "successfully migrates large datasets in batches" do
|
|
||||||
result = migrator.run(batch_size: 3)
|
|
||||||
|
|
||||||
expect(result).to eq(10)
|
|
||||||
expect(BlobFile.count).to eq(10)
|
|
||||||
|
|
||||||
# Verify progress logging occurred multiple times
|
|
||||||
log_output = log_sink.string
|
|
||||||
expect(log_output.scan(/migrated:/).count).to be > 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "#log_progress" do
|
|
||||||
it "logs migration progress with numbers" do
|
|
||||||
migrator.send(:log_progress, 100, 150, 25.5, "abc123")
|
|
||||||
|
|
||||||
log_output = log_sink.string
|
|
||||||
expect(log_output).to include("migrated:")
|
|
||||||
expect(log_output).to include("processed:")
|
|
||||||
expect(log_output).to include("rate:")
|
|
||||||
expect(log_output).to include("last:")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "handles nil last_sha256" do
|
|
||||||
migrator.send(:log_progress, 100, 150, 25.5, nil)
|
|
||||||
|
|
||||||
log_output = log_sink.string
|
|
||||||
expect(log_output).to include("last: nil")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "includes formatted numbers" do
|
|
||||||
migrator.send(:log_progress, 1000, 2500, 10.5, nil)
|
|
||||||
|
|
||||||
log_output = log_sink.string
|
|
||||||
expect(log_output).to include("1,000")
|
|
||||||
expect(log_output).to include("2,500")
|
|
||||||
expect(log_output).to include("10.5")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "#format_number" do
|
|
||||||
it "formats numbers with delimiters" do
|
|
||||||
result = migrator.send(:format_number, 1_234_567)
|
|
||||||
|
|
||||||
expect(result).to include("1,234,567")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "right-justifies numbers" do
|
|
||||||
result = migrator.send(:format_number, 123)
|
|
||||||
|
|
||||||
expect(result.length).to be >= 8
|
|
||||||
end
|
|
||||||
|
|
||||||
it "handles zero" do
|
|
||||||
result = migrator.send(:format_number, 0)
|
|
||||||
|
|
||||||
expect(result).to include("0")
|
|
||||||
expect(result.length).to be >= 8
|
|
||||||
end
|
|
||||||
|
|
||||||
it "handles large numbers" do
|
|
||||||
result = migrator.send(:format_number, 999_999_999)
|
|
||||||
|
|
||||||
expect(result).to include("999,999,999")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "constructor and basic functionality" do
|
|
||||||
it "initializes with log_sink" do
|
|
||||||
expect(migrator).to be_a(Tasks::BlobFileMigrationTask)
|
|
||||||
end
|
|
||||||
|
|
||||||
it "uses provided log_sink" do
|
|
||||||
custom_sink = StringIO.new
|
|
||||||
custom_migrator = described_class.new(log_sink: custom_sink)
|
|
||||||
|
|
||||||
custom_migrator.run
|
|
||||||
|
|
||||||
expect(custom_sink.string).to include("batch_size:")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "defaults to $stderr when no log_sink provided" do
|
|
||||||
expect { described_class.new }.not_to raise_error
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "ZERO_SHA256 constant" do
|
|
||||||
it "defines the zero SHA256 constant" do
|
|
||||||
expect(Tasks::BlobFileMigrationTask::ZERO_SHA256).to eq("00" * 32)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "PROGRESS_KEY constant" do
|
|
||||||
it "defines the progress key constant" do
|
|
||||||
expect(Tasks::BlobFileMigrationTask::PROGRESS_KEY).to eq(
|
|
||||||
"blob-file-migration-task",
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "integration scenarios" do
|
|
||||||
it "handles multiple sequential runs without errors" do
|
|
||||||
first_result = migrator.run(batch_size: 100)
|
|
||||||
expect(first_result).to eq(0)
|
|
||||||
|
|
||||||
# Reset log sink for second run
|
|
||||||
log_sink.truncate(0)
|
|
||||||
log_sink.rewind
|
|
||||||
|
|
||||||
second_result = migrator.run(batch_size: 200)
|
|
||||||
expect(second_result).to eq(0)
|
|
||||||
|
|
||||||
log_output = log_sink.string
|
|
||||||
expect(log_output).to include("batch_size: 200")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "properly formats logs with different parameters" do
|
|
||||||
migrator.run(batch_size: 42, start_sha256: "aa" * 32)
|
|
||||||
|
|
||||||
log_output = log_sink.string
|
|
||||||
expect(log_output).to include("batch_size: 42")
|
|
||||||
expect(log_output).to include("migrated 0 total blob entries")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "parameter validation" do
|
|
||||||
it "accepts valid batch_size parameter" do
|
|
||||||
expect { migrator.run(batch_size: 1) }.not_to raise_error
|
|
||||||
expect { migrator.run(batch_size: 1000) }.not_to raise_error
|
|
||||||
expect { migrator.run(batch_size: 10_000) }.not_to raise_error
|
|
||||||
end
|
|
||||||
|
|
||||||
it "accepts valid start_sha256 parameter" do
|
|
||||||
expect { migrator.run(start_sha256: "00" * 32) }.not_to raise_error
|
|
||||||
expect { migrator.run(start_sha256: "ff" * 32) }.not_to raise_error
|
|
||||||
expect { migrator.run(start_sha256: "ab" * 32) }.not_to raise_error
|
|
||||||
end
|
|
||||||
|
|
||||||
it "accepts 'last' as start_sha256 parameter" do
|
|
||||||
expect { migrator.run(start_sha256: "last") }.not_to raise_error
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@@ -64,14 +64,4 @@ RSpec.describe BlobFile, type: :model do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "blob entry integration" do
|
|
||||||
it "can be initialized from a BlobEntry" do
|
|
||||||
blob_entry = create(:blob_entry)
|
|
||||||
blob_file = BlobFile.initialize_from_blob_entry(blob_entry)
|
|
||||||
expect(blob_file.save).to be true
|
|
||||||
expect(blob_file.content_bytes).to eq(blob_entry.contents)
|
|
||||||
expect(blob_file.content_type).to eq(blob_entry.content_type)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user