fix specs, add migrator

This commit is contained in:
Dylan Knutson
2024-12-18 22:53:05 +00:00
parent 8051c86bb4
commit 16fab739b5
22 changed files with 1012 additions and 171 deletions

View File

@@ -10,6 +10,8 @@ services:
- ../..:/workspaces:cached
- ./fish-shell-conf-d:/home/vscode/.config/fish/conf.d
- devcontainer-redux-gem-cache:/usr/local/rvm/gems
- devcontainer-redux-blob-files:/mnt/blob_files_development
- /tank/redux-data/blob_files_production:/mnt/blob_files_production
# Overrides default command so things don't shut down after the process ends.
command: sleep infinity
@@ -44,3 +46,4 @@ services:
volumes:
postgres-data:
devcontainer-redux-gem-cache:
devcontainer-redux-blob-files:

View File

@@ -10,3 +10,4 @@ profiler
.vscode
launch.json
settings.json
*.export

3
.gitignore vendored
View File

@@ -52,4 +52,5 @@ ext/xdiff/xdiff
/yarn-error.log
yarn-debug.log*
.yarn-integrity
.DS_Store
.DS_Store
*.export

View File

@@ -22,6 +22,23 @@ RUN rake compile
FROM ruby:3.2.6
USER root
# apt caching & install packages
RUN rm -f /etc/apt/apt.conf.d/docker-clean; \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update && \
apt-get install --no-install-recommends --no-install-suggests -y \
libblas-dev liblapack-dev
# preinstall gems that take a long time to install
RUN MAKE="make -j12" gem install bundler -v '2.4.6' --verbose
RUN MAKE="make -j12" gem install faiss -v '0.2.5' --verbose
RUN MAKE="make -j12" gem install rails_live_reload -v '0.3.4' --verbose
RUN bundle config --global frozen 1
# set up nodejs 18.x deb repo
RUN mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
@@ -29,23 +46,17 @@ RUN mkdir -p /etc/apt/keyrings && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_18.x nodistro main" \
| tee /etc/apt/sources.list.d/nodesource.list
# apt caching & install packages
RUN rm -f /etc/apt/apt.conf.d/docker-clean; \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update && \
apt-get install --no-install-recommends --no-install-suggests -y \
libblas-dev liblapack-dev libvips42 ca-certificates curl gnupg nodejs libpq-dev
WORKDIR /usr/src/app
RUN gem install bundler -v '2.4.6'
RUN gem install faiss -v '0.2.5'
# RUN bundle config --global frozen 1
libvips42 ca-certificates curl gnupg nodejs libpq-dev
COPY --from=native-gems /usr/src/app/gems/xdiff-rb /gems/xdiff-rb
COPY --from=native-gems /usr/src/app/gems/rb-bsdiff /gems/rb-bsdiff
WORKDIR /usr/src/app
COPY Gemfile Gemfile.lock ./
RUN bundle install
@@ -58,7 +69,12 @@ RUN yarn
COPY . .
# precompile assets
RUN RAILS_ENV=production bin/rails assets:precompile
# RUN RAILS_ENV=production bin/rails assets:precompile
RUN mkdir -p tmp/pids
# create user with id=1000 gid=1000
RUN groupadd -g 1000 app && \
useradd -m -d /home/app -s /bin/bash -u 1000 -g 1000 app
RUN chown -R app:app /usr/src/app
USER app
CMD /bin/bash

View File

@@ -13,6 +13,7 @@ Rake.application.rake_require "fa"
Rake.application.rake_require "e621"
Rake.application.rake_require "twitter"
Rake.application.rake_require "ib"
Rake.application.rake_require "blob_file"
task set_ar_stdout: :environment do
ActiveRecord::Base.logger = Logger.new($stdout)
@@ -57,24 +58,6 @@ task periodic_tasks: %i[environment set_logger_stdout] do
loop { sleep 10 }
end
namespace :blob_entries do
task export_samples: :environment do
limit = ENV["limit"]&.to_i || raise("need 'limit' (num)")
outfile =
ENV["outfile"] || raise("need 'outfile' (file path, .json encoded)")
BlobEntrySampleExporter.new.export_samples(limit, outfile)
end
task import_samples: :environment do
infile = ENV["infile"] || raise("need 'infile' (file path, .json encoded)")
BlobEntrySampleExporter.new.import_samples(infile)
end
task migrate_entries: :environment do
start_at = ENV["start_at"]
batch_size = ENV["batch_size"]&.to_i || 64
BlobEntrySampleExporter.new.migrate_blob_entries(start_at, batch_size)
end
end
namespace :db_sampler do
task export: :environment do
url_names = ENV["url_names"] || raise("need 'url_names' (comma-separated)")

View File

@@ -22,6 +22,7 @@ class DbSampler
end
def export(url_names)
$stderr.puts "exporting users..."
Domain::Fa::User
.includes(
{
@@ -65,17 +66,16 @@ class DbSampler
def model_id(model)
name = model.class.name
pk = model.class.primary_key.to_sym
id = model.send(pk)
pk = model.class.primary_key
id = model.id
id_fmt = id.to_s
id_fmt = HexUtil.bin2hex(id) if pk == :sha256
id_fmt = HexUtil.bin2hex(id) if pk == "sha256"
id_fmt = "#{name} / #{id_fmt}"
end
def import_model(model)
pk = model.class.primary_key.to_sym
id = model.send(pk)
exists = model.class.exists?(pk => id)
id = model.id
exists = model.class.exists?(id)
if exists
$stderr.puts("skipped existing #{model_id(model)}")
@@ -94,8 +94,7 @@ class DbSampler
def handle_model(model, level, user_depth)
return unless model
is_user = model.is_a?(Domain::Fa::User)
user_depth += 1 if is_user
user_depth += 1 if model.is_a?(Domain::Fa::User)
return unless @handled.add?(model)
assocs = SCHEMA[model.class] || raise("invalid: #{model.class.name}")
@@ -118,7 +117,7 @@ class DbSampler
def dump(model, level)
@file.puts(Base64.strict_encode64(Zstd.compress(Marshal.dump(model), 1)))
id = model.send(model.class.primary_key)
id = model.id
id = HexUtil.bin2hex(id) if model.class.primary_key == "sha256"
$stderr.puts ("-" * level) + " dumped #{model.class.name}/#{id}"
end

View File

@@ -12,13 +12,6 @@ class Scraper::HttpClient
@http_performer = http_performer
@domain_last_requested_at = {}
init_cookie_jar
# TODO - populate AdaptiveCache with domains this client cares about
@max_cache_size = 8
@blob_entry_cache =
Hash.new do |hash, key|
hash[key] = LegacyImport::AdaptiveCache.new(@max_cache_size, 1.0, 0.1)
end
logger.level = :info
@config.do_login(http_performer)
end
@@ -87,26 +80,12 @@ class Scraper::HttpClient
response_headers["Content-Type"] || response_headers["content-type"] ||
"none/none"
cache_key = "#{uri.host}|#{content_type}"
blob_entry_cache = @blob_entry_cache[cache_key]
candidates =
if blob_entry_cache.at_capacity? && rand(0..100) >= 5
blob_entry_cache.candidates
else
[]
end
candidates << caused_by_entry.response if caused_by_entry&.response
if caused_by_entry&.response&.base
candidates << caused_by_entry.response.base
end
retries = 0
begin
response_blob_entry =
BlobEntryP.find_or_build(
content_type: content_type,
contents: response_body,
candidates: candidates
contents: response_body
)
scrubbed_uri = @config.scrub_stored_uri(uri)
@@ -136,18 +115,6 @@ class Scraper::HttpClient
raise
end
if response_blob_entry.base_sha256
blob_entry_cache.reward(
HexUtil.bin2hex(response_blob_entry.base_sha256)[0..8]
)
else
blob_entry_cache.insert(
HexUtil.bin2hex(response_blob_entry.sha256)[0..8],
response_blob_entry,
scrubbed_uri
)
end
response_code_colorized =
if response_code == 200
response_code.to_s.light_green

View File

@@ -55,62 +55,29 @@ class BlobEntryP < ReduxApplicationRecord
self.read_attribute(:contents).size
end
def self.find_or_build(content_type:, contents:, candidates: [])
def self.find_or_build(content_type:, contents:)
sha256 = Digest::SHA256.digest(contents)
BlobEntryP.find_by(sha256: sha256) ||
begin
build_record(
content_type: content_type,
sha256: sha256,
contents: contents,
candidates: candidates
contents: contents
)
end
end
DIFFABLE_CONTENT_TYPES = [%r{text/html}, %r{text/plain}, %r{application/json}]
def self.build_record(content_type:, sha256: nil, contents:, candidates: [])
def self.build_record(content_type:, sha256: nil, contents:)
sha256 ||= Digest::SHA256.digest(contents)
record =
self.new(sha256: sha256, content_type: content_type, size: contents.size)
smallest_patch_size = nil
smallest_patch = nil
smallest_candidate = nil
candidates
.map do |candidate|
# only consider candidates with the same content type (may relax this later)
next nil if candidate.content_type != content_type
# only consider candidates who themselves aren't patch-based
next nil unless candidate.base.nil?
# only consider diffable content types
next nil unless DIFFABLE_CONTENT_TYPES.any? { |ct| content_type =~ ct }
[candidate, XDiff.diff(candidate.contents, contents)]
end
.reject(&:nil?)
.each do |pair|
candidate, patch = pair
if smallest_patch_size.nil? || patch.size < smallest_patch_size
smallest_patch_size = patch.size
smallest_patch = patch
smallest_candidate = candidate
end
end
# only use a patch if it's <= 60% the original content size
if smallest_patch_size && smallest_patch_size <= (contents.size * 0.6)
record.base = smallest_candidate
record.contents = smallest_patch
else
# no candidate present, store the whole contents directly in the record
record.contents = contents
end
raise RuntimeError.new("invariant!") if record.contents != contents
self.new(
sha256: sha256,
content_type: content_type,
size: contents.size,
contents: contents
)
record
end
end

98
app/models/blob_file.rb Normal file
View File

@@ -0,0 +1,98 @@
class BlobFile < ReduxApplicationRecord
self.table_name = "blob_files"
ROOT_DIR =
Rails.application.config_for("blob_file_location") ||
raise("no blob_file_location config")
FILE_PATH_PATTERNS = { v1: [2, 2, 1] }
# consider sha256 `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
# breaking it into 3 parts, becomes:
# e3/b0/c/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
# so with 100M files, each directory will have 100M / (256*256*16) = ~6 files
include ImmutableModel
before_destroy { raise ActiveRecord::ReadOnlyRecord }
self.primary_key = :sha256
EMPTY_FILE_SHA256 =
HexUtil.hex2bin(
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
)
enum version: { v1: 1 }
after_initialize { self.version ||= :v1 }
validates_presence_of(:sha256, :content_type, :size_bytes)
validates :sha256, length: { is: 32 }
validates :content_bytes,
length: {
minimum: 0,
allow_nil: false,
message: "can't be nil"
}
before_save do
if not self.persisted?
unless File.exist?(self.absolute_file_path)
FileUtils.mkdir_p(File.dirname(self.absolute_file_path))
File.binwrite(self.absolute_file_path, self.content_bytes)
end
end
end
def self.find_or_initialize_from_blob_entry(blob_entry)
blob_file =
BlobFile.find_or_initialize_by(sha256: blob_entry.sha256) do |blob_file|
blob_file.content_type = blob_entry.content_type
blob_file.content_bytes = blob_entry.contents
end
blob_file
end
def content_bytes=(content_bytes)
raise("cannot set content_bytes of an existing BlobFile") if self.persisted?
@content_bytes = content_bytes
@content_bytes.force_encoding("ASCII-8BIT")
self.size_bytes = content_bytes.bytesize
self.sha256 = Digest::SHA256.digest(content_bytes)
end
def content_bytes
if self.persisted?
@content_bytes ||=
begin
c = File.binread(self.absolute_file_path)
c.force_encoding("ASCII-8BIT")
c
end
else
@content_bytes
end
end
def absolute_file_path
@absolute_file_path ||= File.join(ROOT_DIR, self.relative_file_path)
end
def relative_file_path
file_path_parts = [
self.version,
*self.class.path_segments(
FILE_PATH_PATTERNS[self.version.to_sym],
HexUtil.bin2hex(self.sha256)
)
]
file_path_parts.join("/")
end
def self.path_segments(pattern, sha256_hex)
parts = []
offset = 0
pattern.each do |part_len|
parts << sha256_hex[offset, part_len]
offset += part_len
end
parts << sha256_hex
parts
end
end

View File

@@ -0,0 +1,14 @@
test:
tmp/blob_files_test
development:
/mnt/blob_files_development
staging:
/mnt/blob_files_production
production:
/mnt/blob_files_production
worker:
/mnt/blob_files_production

View File

@@ -50,28 +50,19 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.integer "res_headers_id"
t.integer "diff_type", default: 0
t.binary "native_blob_entry_sha256"
t.index ["imported_from_file"],
name: "index_cache_http_log_entries_on_imported_from_file"
t.index %w[path host], name: "index_cache_http_log_entries_on_path_and_host"
t.index ["path"],
name: "cache_http_log_entries_path_idx",
opclass: :gist_trgm_ops,
using: :gist
t.index ["imported_from_file"], name: "index_cache_http_log_entries_on_imported_from_file"
t.index ["path", "host"], name: "index_cache_http_log_entries_on_path_and_host"
t.index ["path"], name: "cache_http_log_entries_path_idx", opclass: :gist_trgm_ops, using: :gist
t.index ["path"], name: "index_pattern_ops_on_hle_entry_path"
t.index ["requested_at"],
name: "index_cache_http_log_entries_on_requested_at"
t.index ["requested_at"], name: "index_cache_http_log_entries_on_requested_at"
end
create_table "cache_http_log_entry_headers",
id: :serial,
force: :cascade do |t|
create_table "cache_http_log_entry_headers", id: :serial, force: :cascade do |t|
t.hstore "headers", null: false
t.binary "sha256", null: false
t.datetime "created_at"
t.datetime "updated_at"
t.index ["sha256"],
name: "index_cache_http_log_entry_headers_on_sha256",
unique: true
t.index ["sha256"], name: "index_cache_http_log_entry_headers_on_sha256", unique: true
end
create_table "e621_posts", id: :serial, force: :cascade do |t|
@@ -98,8 +89,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.integer "imgsearch_entry_id"
t.index ["blob_entry_id"], name: "index_e621_posts_on_blob_entry_id"
t.index ["e621_id"], name: "index_e621_posts_on_e621_id", unique: true
t.index ["imgsearch_entry_id"],
name: "index_e621_posts_on_imgsearch_entry_id"
t.index ["imgsearch_entry_id"], name: "index_e621_posts_on_imgsearch_entry_id"
t.index ["md5"], name: "index_e621_posts_on_md5"
end
@@ -248,10 +238,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.integer "object_type", limit: 2, null: false
t.integer "object_id", null: false
t.bigint "fingerprints", array: true
t.index %w[object_type object_id],
name:
"index_lite_media_file_fingerprints_on_object_type_and_object_id",
unique: true
t.index ["object_type", "object_id"], name: "index_lite_media_file_fingerprints_on_object_type_and_object_id", unique: true
end
create_table "lite_s3_objects", force: :cascade do |t|
@@ -271,8 +258,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.float "total_time"
t.bigint "calls"
t.datetime "captured_at"
t.index %w[database captured_at],
name: "index_pghero_query_stats_on_database_and_captured_at"
t.index ["database", "captured_at"], name: "index_pghero_query_stats_on_database_and_captured_at"
end
create_table "pghero_space_stats", id: :serial, force: :cascade do |t|
@@ -281,8 +267,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.text "relation"
t.bigint "size"
t.datetime "captured_at"
t.index %w[database captured_at],
name: "index_pghero_space_stats_on_database_and_captured_at"
t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at"
end
create_table "watch_jobs", id: :serial, force: :cascade do |t|
@@ -319,9 +304,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.integer "status"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index %w[enum_type weasyl_id],
name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id",
unique: true
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id", unique: true
end
create_table "weasyl_descriptions", id: :serial, force: :cascade do |t|
@@ -333,18 +316,13 @@ ActiveRecord::Schema[7.0].define(version: 0) do
create_table "weasyl_joins_user_follows", id: :serial, force: :cascade do |t|
t.integer "follower_id", null: false
t.integer "followed_id", null: false
t.index %w[follower_id followed_id],
name:
"index_weasyl_joins_user_follows_on_follower_id_and_followed_id",
unique: true
t.index ["follower_id", "followed_id"], name: "index_weasyl_joins_user_follows_on_follower_id_and_followed_id", unique: true
end
create_table "weasyl_joins_user_friends", id: :serial, force: :cascade do |t|
t.integer "a_id", null: false
t.integer "b_id", null: false
t.index %w[a_id b_id],
name: "index_weasyl_joins_user_friends_on_a_id_and_b_id",
unique: true
t.index ["a_id", "b_id"], name: "index_weasyl_joins_user_friends_on_a_id_and_b_id", unique: true
end
create_table "weasyl_medias", id: :serial, force: :cascade do |t|
@@ -377,9 +355,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.datetime "full_scanned_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index %w[enum_type weasyl_id],
name: "index_weasyl_posts_on_enum_type_and_weasyl_id",
unique: true
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_posts_on_enum_type_and_weasyl_id", unique: true
end
create_table "weasyl_users", id: :serial, force: :cascade do |t|
@@ -393,9 +369,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.integer "userid"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["login_name"],
name: "index_weasyl_users_on_login_name",
unique: true
t.index ["login_name"], name: "index_weasyl_users_on_login_name", unique: true
end
create_table "xtwitter_tweets", id: :serial, force: :cascade do |t|
@@ -420,4 +394,5 @@ ActiveRecord::Schema[7.0].define(version: 0) do
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
end

View File

@@ -9,6 +9,6 @@ class DropGoodJobs < ActiveRecord::Migration[7.0]
drop_table :good_job_batches
drop_table :good_job_executions
drop_table :good_job_processes
drop_table :good_job_setting
drop_table :good_job_settings
end
end

View File

@@ -0,0 +1,41 @@
class CreateBlobFiles < ActiveRecord::Migration[7.0]
NUM_PARTITIONS = 64
def up
main_table_sql = <<~SQL
CREATE TABLE blob_files (
sha256 bytea NOT NULL, -- sha256 of the file
version integer NOT NULL, -- how to reconstruct the file path based on sha256
metadata jsonb NOT NULL DEFAULT '{}', -- metadata about the file, where it came from, etc
content_type character varying NOT NULL, -- content type of the file
size_bytes integer NOT NULL, -- size of the file in bytes
created_at timestamp(6) without time zone NOT NULL
) PARTITION BY HASH (sha256)
SQL
execute main_table_sql
NUM_PARTITIONS.times do |partnum|
partition_table_name = :"blob_files_#{partnum.to_s.rjust(2, "0")}"
partition_table_sql = <<~SQL
CREATE TABLE #{partition_table_name}
PARTITION OF blob_files FOR
VALUES WITH (
MODULUS #{NUM_PARTITIONS},
REMAINDER #{partnum}
)
SQL
execute partition_table_sql
add_index partition_table_name, :sha256, unique: true
end
add_index :blob_files, :sha256, unique: true
end
def down
drop_table :blob_files
0..NUM_PARTITIONS.times do |partnum|
partition_table_name = :"blob_files_#{partnum.to_s.rjust(2, "0")}"
drop_table partition_table_name
end
end
end

652
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2024_12_17_182130) do
ActiveRecord::Schema[7.0].define(version: 2024_12_18_190906) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_prewarm"
enable_extension "pg_stat_statements"
@@ -605,6 +605,656 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_17_182130) do
t.index ["sha256"], name: "index_blob_entries_p_63_on_sha256", unique: true
end
create_table "blob_files", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_on_sha256", unique: true
end
create_table "blob_files_00", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_00_on_sha256", unique: true
end
create_table "blob_files_01", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_01_on_sha256", unique: true
end
create_table "blob_files_02", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_02_on_sha256", unique: true
end
create_table "blob_files_03", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_03_on_sha256", unique: true
end
create_table "blob_files_04", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_04_on_sha256", unique: true
end
create_table "blob_files_05", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_05_on_sha256", unique: true
end
create_table "blob_files_06", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_06_on_sha256", unique: true
end
create_table "blob_files_07", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_07_on_sha256", unique: true
end
create_table "blob_files_08", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_08_on_sha256", unique: true
end
create_table "blob_files_09", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_09_on_sha256", unique: true
end
create_table "blob_files_10", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_10_on_sha256", unique: true
end
create_table "blob_files_11", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_11_on_sha256", unique: true
end
create_table "blob_files_12", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_12_on_sha256", unique: true
end
create_table "blob_files_13", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_13_on_sha256", unique: true
end
create_table "blob_files_14", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_14_on_sha256", unique: true
end
create_table "blob_files_15", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_15_on_sha256", unique: true
end
create_table "blob_files_16", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_16_on_sha256", unique: true
end
create_table "blob_files_17", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_17_on_sha256", unique: true
end
create_table "blob_files_18", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_18_on_sha256", unique: true
end
create_table "blob_files_19", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_19_on_sha256", unique: true
end
create_table "blob_files_20", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_20_on_sha256", unique: true
end
create_table "blob_files_21", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_21_on_sha256", unique: true
end
create_table "blob_files_22", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_22_on_sha256", unique: true
end
create_table "blob_files_23", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_23_on_sha256", unique: true
end
create_table "blob_files_24", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_24_on_sha256", unique: true
end
create_table "blob_files_25", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_25_on_sha256", unique: true
end
create_table "blob_files_26", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_26_on_sha256", unique: true
end
create_table "blob_files_27", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_27_on_sha256", unique: true
end
create_table "blob_files_28", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_28_on_sha256", unique: true
end
create_table "blob_files_29", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_29_on_sha256", unique: true
end
create_table "blob_files_30", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_30_on_sha256", unique: true
end
create_table "blob_files_31", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_31_on_sha256", unique: true
end
create_table "blob_files_32", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_32_on_sha256", unique: true
end
create_table "blob_files_33", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_33_on_sha256", unique: true
end
create_table "blob_files_34", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_34_on_sha256", unique: true
end
create_table "blob_files_35", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_35_on_sha256", unique: true
end
create_table "blob_files_36", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_36_on_sha256", unique: true
end
create_table "blob_files_37", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_37_on_sha256", unique: true
end
create_table "blob_files_38", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_38_on_sha256", unique: true
end
create_table "blob_files_39", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_39_on_sha256", unique: true
end
create_table "blob_files_40", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_40_on_sha256", unique: true
end
create_table "blob_files_41", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_41_on_sha256", unique: true
end
create_table "blob_files_42", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_42_on_sha256", unique: true
end
create_table "blob_files_43", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_43_on_sha256", unique: true
end
create_table "blob_files_44", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_44_on_sha256", unique: true
end
create_table "blob_files_45", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_45_on_sha256", unique: true
end
create_table "blob_files_46", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_46_on_sha256", unique: true
end
create_table "blob_files_47", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_47_on_sha256", unique: true
end
create_table "blob_files_48", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_48_on_sha256", unique: true
end
create_table "blob_files_49", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_49_on_sha256", unique: true
end
create_table "blob_files_50", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_50_on_sha256", unique: true
end
create_table "blob_files_51", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_51_on_sha256", unique: true
end
create_table "blob_files_52", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_52_on_sha256", unique: true
end
create_table "blob_files_53", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_53_on_sha256", unique: true
end
create_table "blob_files_54", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_54_on_sha256", unique: true
end
create_table "blob_files_55", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_55_on_sha256", unique: true
end
create_table "blob_files_56", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_56_on_sha256", unique: true
end
create_table "blob_files_57", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_57_on_sha256", unique: true
end
create_table "blob_files_58", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_58_on_sha256", unique: true
end
create_table "blob_files_59", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_59_on_sha256", unique: true
end
create_table "blob_files_60", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_60_on_sha256", unique: true
end
create_table "blob_files_61", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_61_on_sha256", unique: true
end
create_table "blob_files_62", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_62_on_sha256", unique: true
end
create_table "blob_files_63", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.integer "version", null: false
t.jsonb "metadata", default: {}, null: false
t.string "content_type", null: false
t.integer "size_bytes", null: false
t.datetime "created_at", null: false
t.index ["sha256"], name: "index_blob_files_63_on_sha256", unique: true
end
create_table "delayed_jobs", force: :cascade do |t|
t.integer "priority", default: 0, null: false
t.integer "attempts", default: 0, null: false

28
package-lock.json generated
View File

@@ -35,8 +35,10 @@
},
"devDependencies": {
"@pmmmwh/react-refresh-webpack-plugin": "^0.5.10",
"@prettier/plugin-ruby": "^4.0.4",
"@types/lodash": "^4.14.192",
"@types/react": "^18.0.33",
"prettier": "^3.4.2",
"react-refresh": "^0.14.0",
"typescript": "^5.0.3",
"webpack": "5",
@@ -2089,6 +2091,16 @@
}
}
},
"node_modules/@prettier/plugin-ruby": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/@prettier/plugin-ruby/-/plugin-ruby-4.0.4.tgz",
"integrity": "sha512-lCpvfS/dQU5WrwN3AQ5vR8qrvj2h5gE41X08NNzAAXvHdM4zwwGRcP2sHSxfu6n6No+ljWCVx95NvJPFTTjCTg==",
"dev": true,
"license": "MIT",
"peerDependencies": {
"prettier": "^3.0.0"
}
},
"node_modules/@sinclair/typebox": {
"version": "0.25.24",
"resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.25.24.tgz",
@@ -6575,6 +6587,22 @@
"integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==",
"license": "MIT"
},
"node_modules/prettier": {
"version": "3.4.2",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.4.2.tgz",
"integrity": "sha512-e9MewbtFo+Fevyuxn/4rrcDAaq0IYxPGLvObpQjiZBMAzB9IGmzlnG9RZy3FFas+eBMu2vA0CszMeduow5dIuQ==",
"dev": true,
"license": "MIT",
"bin": {
"prettier": "bin/prettier.cjs"
},
"engines": {
"node": ">=14"
},
"funding": {
"url": "https://github.com/prettier/prettier?sponsor=1"
}
},
"node_modules/process-nextick-args": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",

31
rake/blob_file.rake Normal file
View File

@@ -0,0 +1,31 @@
namespace :blob_file do
desc "migrate blob files to the new format"
task migrate_blob_entries: :environment do
batch_size = ENV["batch_size"]&.to_i || 1000
start_at = ENV["start_at"] || ("00" * 32)
num_migrated = 0
BlobEntryP
.where("sha256 >= decode(?, 'hex')", start_at)
.where("sha256 NOT IN (SELECT sha256 FROM blob_files)")
.find_in_batches(batch_size: batch_size) do |batch|
puts "loaded #{batch.size} blob entries starting at #{HexUtil.bin2hex(batch.first.sha256)}"
batch.each do |blob_entry|
blob_file = BlobFile.find_or_initialize_from_blob_entry(blob_entry)
sha256_hex = HexUtil.bin2hex(blob_file.sha256)
begin
unless blob_file.persisted?
blob_file.save!
num_migrated += 1
end
rescue => e
puts "error saving blob file #{sha256_hex}: #{e}"
end
end
puts "migrated #{batch.size} blob entries [last: #{HexUtil.bin2hex(batch.last.sha256)}]"
end
puts "migrated #{num_migrated} blob entries"
end
end

View File

@@ -53,7 +53,7 @@ describe Domain::Fa::Job::FavsJob do
user.scanned_favs_at = old_scanned_at = 1.day.ago
user.save!
perform_now({ url_name: "zzreg" })
expect(user.scanned_favs_at).to eq(old_scanned_at)
expect(user.scanned_favs_at).to be_within(1.second).of(old_scanned_at)
end
end

View File

@@ -7,13 +7,10 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
"https://inkbunny.net/api_search.php?orderby=create_datetime&keywords=no&title=no&description=no"
end
let(:api_submissions_url) do
"https://inkbunny.net/api_submissions.php" +
"?submission_ids=3104202,3104200,3104197&" +
"show_description=yes&show_writing=yes&show_pools=yes"
"https://inkbunny.net/api_submissions.php?submission_ids=3104202,3104200,3104197&show_description=yes&show_writing=yes&show_pools=yes"
end
let(:api_submissions_1047334_url) do
"https://inkbunny.net/api_submissions.php" + "?submission_ids=1047334&" +
"show_description=yes&show_writing=yes&show_pools=yes"
"https://inkbunny.net/api_submissions.php?submission_ids=1047334&show_description=yes&show_writing=yes&show_pools=yes"
end
context "the files do not change in the response" do
@@ -29,7 +26,7 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
SpecUtil.read_fixture_file("domain/inkbunny/job/api_search.json")
},
{
method: :post,
method: :get,
uri: api_submissions_url,
content_type: "application/json",
contents:
@@ -141,7 +138,7 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
)
},
{
method: :post,
method: :get,
uri: api_submissions_1047334_url,
content_type: "application/json",
contents:
@@ -160,7 +157,7 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
)
},
{
method: :post,
method: :get,
uri: api_submissions_1047334_url,
content_type: "application/json",
contents:

View File

@@ -43,7 +43,7 @@ describe Domain::E621::CsvPostImporter do
expect(post.tags_array).to eq(%w[tag1 tag2])
expect(post.tags.map(&:name)).to match(%w[tag1 tag2])
expect(post.taggings.map(&:category).to_set).to eq(["cat_general"].to_set)
expect(post.e621_updated_at).to eq(Time.parse "2023-08-24T07:35:09-07:00")
expect(post.e621_updated_at).to eq(Time.parse "2023-08-24T07:35:09")
end
it "does not touch posts updated after the csv" do

View File

@@ -17,7 +17,7 @@ class SpecUtil
response_body: "http body",
performer_name: "direct"
)
mock = instance_double("Scraper::HttpPerformer")
mock = instance_double("Scraper::CurlHttpPerformer")
allow(mock).to receive(:is_a?).with(String).and_return(false)
allow(mock).to receive(:name).and_return(performer_name)
allow(mock).to receive(:do_request).with(
@@ -25,7 +25,7 @@ class SpecUtil
expected_url,
request_headers
).and_return(
Scraper::HttpPerformer::Response.new(
Scraper::CurlHttpPerformer::Response.new(
response_code,
response_headers,
response_time_ms,

View File

@@ -0,0 +1,70 @@
class BlobFileTest < ActiveSupport::TestCase
setup do
# safeguard against running this test in a non-test environment
root_dir =
File.absolute_path(Rails.application.config_for("blob_file_location"))
assert_match %r{^#{Rails.root}/tmp}, root_dir
FileUtils.rm_rf(root_dir)
end
test "building a blob file works" do
content_bytes = TestUtil.random_string(1024)
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
assert blob_file.valid?
assert_equal blob_file.size_bytes, 1024
assert_equal blob_file.content_bytes, content_bytes
assert_equal blob_file.content_type, "text"
assert_equal blob_file.content_bytes.encoding, Encoding::ASCII_8BIT
assert blob_file.save
end
test "counts bytes with unicode characters" do
content_bytes = "fooばr"
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
assert blob_file.valid?
assert_equal blob_file.size_bytes, 7
assert blob_file.save
sha256 = blob_file.sha256
blob_file = BlobFile.find(sha256)
assert_equal blob_file.content_bytes, content_bytes
end
test "puts the file in the right place" do
content_bytes = TestUtil.random_string(1024)
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
assert blob_file.save
assert File.exist?(blob_file.absolute_file_path)
assert_equal File.binread(blob_file.absolute_file_path), content_bytes
end
test "saving blob when the file already exists works" do
content_bytes = TestUtil.random_string(1024)
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
FileUtils.mkdir_p(File.dirname(blob_file.absolute_file_path))
File.binwrite(blob_file.absolute_file_path, content_bytes)
assert blob_file.save
assert_equal blob_file.content_bytes, content_bytes
end
test "file path segmentation works" do
test_cases = [
["abcd1234", [2], %w[ab abcd1234]],
["abcd1234", [4, 2], %w[abcd 12 abcd1234]],
["abcd1234", [4, 2, 2], %w[abcd 12 34 abcd1234]],
["abcd1234", [2, 2, 1], %w[ab cd 1 abcd1234]]
]
test_cases.each do |sha256_hex, pattern, expected|
assert_equal BlobFile.path_segments(pattern, sha256_hex), expected
end
end
test "from an initialized BlobEntryP" do
blob_entry = TestUtil.build_blob_entry
blob_file = BlobFile.find_or_initialize_from_blob_entry(blob_entry)
assert blob_file.save
assert_equal blob_file.content_bytes, blob_entry.contents
assert_equal blob_file.content_type, blob_entry.content_type
end
end

View File

@@ -111,7 +111,7 @@ module TestUtil
mock.expect(:name, "direct")
mock.expect(
:get,
Scraper::HttpPerformer::Response.new(
Scraper::CurlHttpPerformer::Response.new(
response_code,
response_headers,
response_time_ms,