Files
redux-scraper/app/models/blob_entry.rb
2025-01-01 22:30:55 +00:00

120 lines
2.8 KiB
Ruby

# typed: strict
class BlobEntry < ReduxApplicationRecord
self.table_name = "blob_entries_p"
include ImmutableModel
before_destroy { raise ActiveRecord::ReadOnlyRecord }
self.primary_key = :sha256
EMPTY_FILE_SHA256 =
T.let(
HexUtil.hex2bin(
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
),
String,
)
belongs_to :base,
optional: true,
foreign_key: :base_sha256,
class_name: "::BlobEntry"
after_create do
actual_sha256 = Digest::SHA256.digest(contents)
raise("digest mismatch for BlobEntry") if sha256 != actual_sha256
end
sig { returns(T.nilable(BlobEntry)) }
def base
@base_model = T.let(@base_model, T.nilable(BlobEntry))
@base_model ||=
super ||
if (sha256 = base_sha256)
self.class.ensure(sha256)
end
end
validates_presence_of(:sha256, :content_type, :size)
validates :contents,
length: {
minimum: 0,
allow_nil: false,
message: "can't be nil",
}
validates :sha256, length: { is: 32 }
validates :base_sha256, length: { is: 32 }, if: :base_sha256
sig { params(sha256: String).returns(BlobEntry) }
def self.ensure(sha256)
find_by(sha256: sha256) ||
raise("blob #{HexUtil.bin2hex(sha256)} does not exist")
end
sig { returns(T.nilable(String)) }
def sha256_hex
if (hash = self.sha256)
HexUtil.bin2hex(hash)
end
end
sig { returns(T.nilable(String)) }
def contents
@contents ||=
T.let(
begin
contents_raw = self.read_attribute(:contents)
base_contents = self.base&.contents
if base_contents
XDiff.patch(base_contents, contents_raw)
else
contents_raw
end
end,
T.nilable(String),
)
end
sig { returns(T.nilable(Integer)) }
def bytes_stored
self.read_attribute(:contents)&.size
end
sig { params(content_type: String, contents: String).returns(BlobEntry) }
def self.find_or_build(content_type:, contents:)
sha256 = Digest::SHA256.digest(contents)
BlobEntry.find_by(sha256: sha256) ||
begin
build_record(
content_type: content_type,
sha256: sha256,
contents: contents,
)
end
end
DIFFABLE_CONTENT_TYPES =
T.let(
[%r{text/html}, %r{text/plain}, %r{application/json}],
T::Array[Regexp],
)
sig do
params(
content_type: String,
contents: String,
sha256: T.nilable(String),
).returns(BlobEntry)
end
def self.build_record(content_type:, contents:, sha256: nil)
sha256 ||= Digest::SHA256.digest(contents)
record =
self.new(
sha256: sha256,
content_type: content_type,
size: contents.size,
contents: contents,
)
record
end
end