fix specs, add migrator
This commit is contained in:
@@ -10,6 +10,8 @@ services:
|
||||
- ../..:/workspaces:cached
|
||||
- ./fish-shell-conf-d:/home/vscode/.config/fish/conf.d
|
||||
- devcontainer-redux-gem-cache:/usr/local/rvm/gems
|
||||
- devcontainer-redux-blob-files:/mnt/blob_files_development
|
||||
- /tank/redux-data/blob_files_production:/mnt/blob_files_production
|
||||
|
||||
# Overrides default command so things don't shut down after the process ends.
|
||||
command: sleep infinity
|
||||
@@ -44,3 +46,4 @@ services:
|
||||
volumes:
|
||||
postgres-data:
|
||||
devcontainer-redux-gem-cache:
|
||||
devcontainer-redux-blob-files:
|
||||
|
||||
@@ -10,3 +10,4 @@ profiler
|
||||
.vscode
|
||||
launch.json
|
||||
settings.json
|
||||
*.export
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -52,4 +52,5 @@ ext/xdiff/xdiff
|
||||
/yarn-error.log
|
||||
yarn-debug.log*
|
||||
.yarn-integrity
|
||||
.DS_Store
|
||||
.DS_Store
|
||||
*.export
|
||||
38
Dockerfile
38
Dockerfile
@@ -22,6 +22,23 @@ RUN rake compile
|
||||
FROM ruby:3.2.6
|
||||
USER root
|
||||
|
||||
# apt caching & install packages
|
||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean; \
|
||||
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||
|
||||
RUN \
|
||||
--mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update && \
|
||||
apt-get install --no-install-recommends --no-install-suggests -y \
|
||||
libblas-dev liblapack-dev
|
||||
|
||||
# preinstall gems that take a long time to install
|
||||
RUN MAKE="make -j12" gem install bundler -v '2.4.6' --verbose
|
||||
RUN MAKE="make -j12" gem install faiss -v '0.2.5' --verbose
|
||||
RUN MAKE="make -j12" gem install rails_live_reload -v '0.3.4' --verbose
|
||||
RUN bundle config --global frozen 1
|
||||
|
||||
# set up nodejs 18.x deb repo
|
||||
RUN mkdir -p /etc/apt/keyrings && \
|
||||
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
|
||||
@@ -29,23 +46,17 @@ RUN mkdir -p /etc/apt/keyrings && \
|
||||
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_18.x nodistro main" \
|
||||
| tee /etc/apt/sources.list.d/nodesource.list
|
||||
|
||||
# apt caching & install packages
|
||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean; \
|
||||
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||
RUN \
|
||||
--mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update && \
|
||||
apt-get install --no-install-recommends --no-install-suggests -y \
|
||||
libblas-dev liblapack-dev libvips42 ca-certificates curl gnupg nodejs libpq-dev
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
RUN gem install bundler -v '2.4.6'
|
||||
RUN gem install faiss -v '0.2.5'
|
||||
# RUN bundle config --global frozen 1
|
||||
libvips42 ca-certificates curl gnupg nodejs libpq-dev
|
||||
|
||||
COPY --from=native-gems /usr/src/app/gems/xdiff-rb /gems/xdiff-rb
|
||||
COPY --from=native-gems /usr/src/app/gems/rb-bsdiff /gems/rb-bsdiff
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
COPY Gemfile Gemfile.lock ./
|
||||
RUN bundle install
|
||||
|
||||
@@ -58,7 +69,12 @@ RUN yarn
|
||||
COPY . .
|
||||
|
||||
# precompile assets
|
||||
RUN RAILS_ENV=production bin/rails assets:precompile
|
||||
|
||||
# RUN RAILS_ENV=production bin/rails assets:precompile
|
||||
RUN mkdir -p tmp/pids
|
||||
|
||||
# create user with id=1000 gid=1000
|
||||
RUN groupadd -g 1000 app && \
|
||||
useradd -m -d /home/app -s /bin/bash -u 1000 -g 1000 app
|
||||
RUN chown -R app:app /usr/src/app
|
||||
USER app
|
||||
CMD /bin/bash
|
||||
|
||||
19
Rakefile
19
Rakefile
@@ -13,6 +13,7 @@ Rake.application.rake_require "fa"
|
||||
Rake.application.rake_require "e621"
|
||||
Rake.application.rake_require "twitter"
|
||||
Rake.application.rake_require "ib"
|
||||
Rake.application.rake_require "blob_file"
|
||||
|
||||
task set_ar_stdout: :environment do
|
||||
ActiveRecord::Base.logger = Logger.new($stdout)
|
||||
@@ -57,24 +58,6 @@ task periodic_tasks: %i[environment set_logger_stdout] do
|
||||
loop { sleep 10 }
|
||||
end
|
||||
|
||||
namespace :blob_entries do
|
||||
task export_samples: :environment do
|
||||
limit = ENV["limit"]&.to_i || raise("need 'limit' (num)")
|
||||
outfile =
|
||||
ENV["outfile"] || raise("need 'outfile' (file path, .json encoded)")
|
||||
BlobEntrySampleExporter.new.export_samples(limit, outfile)
|
||||
end
|
||||
task import_samples: :environment do
|
||||
infile = ENV["infile"] || raise("need 'infile' (file path, .json encoded)")
|
||||
BlobEntrySampleExporter.new.import_samples(infile)
|
||||
end
|
||||
task migrate_entries: :environment do
|
||||
start_at = ENV["start_at"]
|
||||
batch_size = ENV["batch_size"]&.to_i || 64
|
||||
BlobEntrySampleExporter.new.migrate_blob_entries(start_at, batch_size)
|
||||
end
|
||||
end
|
||||
|
||||
namespace :db_sampler do
|
||||
task export: :environment do
|
||||
url_names = ENV["url_names"] || raise("need 'url_names' (comma-separated)")
|
||||
|
||||
@@ -22,6 +22,7 @@ class DbSampler
|
||||
end
|
||||
|
||||
def export(url_names)
|
||||
$stderr.puts "exporting users..."
|
||||
Domain::Fa::User
|
||||
.includes(
|
||||
{
|
||||
@@ -65,17 +66,16 @@ class DbSampler
|
||||
|
||||
def model_id(model)
|
||||
name = model.class.name
|
||||
pk = model.class.primary_key.to_sym
|
||||
id = model.send(pk)
|
||||
pk = model.class.primary_key
|
||||
id = model.id
|
||||
id_fmt = id.to_s
|
||||
id_fmt = HexUtil.bin2hex(id) if pk == :sha256
|
||||
id_fmt = HexUtil.bin2hex(id) if pk == "sha256"
|
||||
id_fmt = "#{name} / #{id_fmt}"
|
||||
end
|
||||
|
||||
def import_model(model)
|
||||
pk = model.class.primary_key.to_sym
|
||||
id = model.send(pk)
|
||||
exists = model.class.exists?(pk => id)
|
||||
id = model.id
|
||||
exists = model.class.exists?(id)
|
||||
|
||||
if exists
|
||||
$stderr.puts("skipped existing #{model_id(model)}")
|
||||
@@ -94,8 +94,7 @@ class DbSampler
|
||||
|
||||
def handle_model(model, level, user_depth)
|
||||
return unless model
|
||||
is_user = model.is_a?(Domain::Fa::User)
|
||||
user_depth += 1 if is_user
|
||||
user_depth += 1 if model.is_a?(Domain::Fa::User)
|
||||
return unless @handled.add?(model)
|
||||
|
||||
assocs = SCHEMA[model.class] || raise("invalid: #{model.class.name}")
|
||||
@@ -118,7 +117,7 @@ class DbSampler
|
||||
|
||||
def dump(model, level)
|
||||
@file.puts(Base64.strict_encode64(Zstd.compress(Marshal.dump(model), 1)))
|
||||
id = model.send(model.class.primary_key)
|
||||
id = model.id
|
||||
id = HexUtil.bin2hex(id) if model.class.primary_key == "sha256"
|
||||
$stderr.puts ("-" * level) + " dumped #{model.class.name}/#{id}"
|
||||
end
|
||||
|
||||
@@ -12,13 +12,6 @@ class Scraper::HttpClient
|
||||
@http_performer = http_performer
|
||||
@domain_last_requested_at = {}
|
||||
init_cookie_jar
|
||||
# TODO - populate AdaptiveCache with domains this client cares about
|
||||
@max_cache_size = 8
|
||||
@blob_entry_cache =
|
||||
Hash.new do |hash, key|
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(@max_cache_size, 1.0, 0.1)
|
||||
end
|
||||
logger.level = :info
|
||||
@config.do_login(http_performer)
|
||||
end
|
||||
|
||||
@@ -87,26 +80,12 @@ class Scraper::HttpClient
|
||||
response_headers["Content-Type"] || response_headers["content-type"] ||
|
||||
"none/none"
|
||||
|
||||
cache_key = "#{uri.host}|#{content_type}"
|
||||
blob_entry_cache = @blob_entry_cache[cache_key]
|
||||
candidates =
|
||||
if blob_entry_cache.at_capacity? && rand(0..100) >= 5
|
||||
blob_entry_cache.candidates
|
||||
else
|
||||
[]
|
||||
end
|
||||
candidates << caused_by_entry.response if caused_by_entry&.response
|
||||
if caused_by_entry&.response&.base
|
||||
candidates << caused_by_entry.response.base
|
||||
end
|
||||
|
||||
retries = 0
|
||||
begin
|
||||
response_blob_entry =
|
||||
BlobEntryP.find_or_build(
|
||||
content_type: content_type,
|
||||
contents: response_body,
|
||||
candidates: candidates
|
||||
contents: response_body
|
||||
)
|
||||
|
||||
scrubbed_uri = @config.scrub_stored_uri(uri)
|
||||
@@ -136,18 +115,6 @@ class Scraper::HttpClient
|
||||
raise
|
||||
end
|
||||
|
||||
if response_blob_entry.base_sha256
|
||||
blob_entry_cache.reward(
|
||||
HexUtil.bin2hex(response_blob_entry.base_sha256)[0..8]
|
||||
)
|
||||
else
|
||||
blob_entry_cache.insert(
|
||||
HexUtil.bin2hex(response_blob_entry.sha256)[0..8],
|
||||
response_blob_entry,
|
||||
scrubbed_uri
|
||||
)
|
||||
end
|
||||
|
||||
response_code_colorized =
|
||||
if response_code == 200
|
||||
response_code.to_s.light_green
|
||||
|
||||
@@ -55,62 +55,29 @@ class BlobEntryP < ReduxApplicationRecord
|
||||
self.read_attribute(:contents).size
|
||||
end
|
||||
|
||||
def self.find_or_build(content_type:, contents:, candidates: [])
|
||||
def self.find_or_build(content_type:, contents:)
|
||||
sha256 = Digest::SHA256.digest(contents)
|
||||
BlobEntryP.find_by(sha256: sha256) ||
|
||||
begin
|
||||
build_record(
|
||||
content_type: content_type,
|
||||
sha256: sha256,
|
||||
contents: contents,
|
||||
candidates: candidates
|
||||
contents: contents
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
DIFFABLE_CONTENT_TYPES = [%r{text/html}, %r{text/plain}, %r{application/json}]
|
||||
|
||||
def self.build_record(content_type:, sha256: nil, contents:, candidates: [])
|
||||
def self.build_record(content_type:, sha256: nil, contents:)
|
||||
sha256 ||= Digest::SHA256.digest(contents)
|
||||
record =
|
||||
self.new(sha256: sha256, content_type: content_type, size: contents.size)
|
||||
|
||||
smallest_patch_size = nil
|
||||
smallest_patch = nil
|
||||
smallest_candidate = nil
|
||||
|
||||
candidates
|
||||
.map do |candidate|
|
||||
# only consider candidates with the same content type (may relax this later)
|
||||
next nil if candidate.content_type != content_type
|
||||
# only consider candidates who themselves aren't patch-based
|
||||
next nil unless candidate.base.nil?
|
||||
# only consider diffable content types
|
||||
next nil unless DIFFABLE_CONTENT_TYPES.any? { |ct| content_type =~ ct }
|
||||
|
||||
[candidate, XDiff.diff(candidate.contents, contents)]
|
||||
end
|
||||
.reject(&:nil?)
|
||||
.each do |pair|
|
||||
candidate, patch = pair
|
||||
if smallest_patch_size.nil? || patch.size < smallest_patch_size
|
||||
smallest_patch_size = patch.size
|
||||
smallest_patch = patch
|
||||
smallest_candidate = candidate
|
||||
end
|
||||
end
|
||||
|
||||
# only use a patch if it's <= 60% the original content size
|
||||
if smallest_patch_size && smallest_patch_size <= (contents.size * 0.6)
|
||||
record.base = smallest_candidate
|
||||
record.contents = smallest_patch
|
||||
else
|
||||
# no candidate present, store the whole contents directly in the record
|
||||
record.contents = contents
|
||||
end
|
||||
|
||||
raise RuntimeError.new("invariant!") if record.contents != contents
|
||||
|
||||
self.new(
|
||||
sha256: sha256,
|
||||
content_type: content_type,
|
||||
size: contents.size,
|
||||
contents: contents
|
||||
)
|
||||
record
|
||||
end
|
||||
end
|
||||
|
||||
98
app/models/blob_file.rb
Normal file
98
app/models/blob_file.rb
Normal file
@@ -0,0 +1,98 @@
|
||||
class BlobFile < ReduxApplicationRecord
|
||||
self.table_name = "blob_files"
|
||||
|
||||
ROOT_DIR =
|
||||
Rails.application.config_for("blob_file_location") ||
|
||||
raise("no blob_file_location config")
|
||||
FILE_PATH_PATTERNS = { v1: [2, 2, 1] }
|
||||
# consider sha256 `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
|
||||
# breaking it into 3 parts, becomes:
|
||||
# e3/b0/c/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
||||
# so with 100M files, each directory will have 100M / (256*256*16) = ~6 files
|
||||
|
||||
include ImmutableModel
|
||||
before_destroy { raise ActiveRecord::ReadOnlyRecord }
|
||||
|
||||
self.primary_key = :sha256
|
||||
EMPTY_FILE_SHA256 =
|
||||
HexUtil.hex2bin(
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
||||
)
|
||||
|
||||
enum version: { v1: 1 }
|
||||
after_initialize { self.version ||= :v1 }
|
||||
|
||||
validates_presence_of(:sha256, :content_type, :size_bytes)
|
||||
validates :sha256, length: { is: 32 }
|
||||
validates :content_bytes,
|
||||
length: {
|
||||
minimum: 0,
|
||||
allow_nil: false,
|
||||
message: "can't be nil"
|
||||
}
|
||||
|
||||
before_save do
|
||||
if not self.persisted?
|
||||
unless File.exist?(self.absolute_file_path)
|
||||
FileUtils.mkdir_p(File.dirname(self.absolute_file_path))
|
||||
File.binwrite(self.absolute_file_path, self.content_bytes)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.find_or_initialize_from_blob_entry(blob_entry)
|
||||
blob_file =
|
||||
BlobFile.find_or_initialize_by(sha256: blob_entry.sha256) do |blob_file|
|
||||
blob_file.content_type = blob_entry.content_type
|
||||
blob_file.content_bytes = blob_entry.contents
|
||||
end
|
||||
blob_file
|
||||
end
|
||||
|
||||
def content_bytes=(content_bytes)
|
||||
raise("cannot set content_bytes of an existing BlobFile") if self.persisted?
|
||||
@content_bytes = content_bytes
|
||||
@content_bytes.force_encoding("ASCII-8BIT")
|
||||
self.size_bytes = content_bytes.bytesize
|
||||
self.sha256 = Digest::SHA256.digest(content_bytes)
|
||||
end
|
||||
|
||||
def content_bytes
|
||||
if self.persisted?
|
||||
@content_bytes ||=
|
||||
begin
|
||||
c = File.binread(self.absolute_file_path)
|
||||
c.force_encoding("ASCII-8BIT")
|
||||
c
|
||||
end
|
||||
else
|
||||
@content_bytes
|
||||
end
|
||||
end
|
||||
|
||||
def absolute_file_path
|
||||
@absolute_file_path ||= File.join(ROOT_DIR, self.relative_file_path)
|
||||
end
|
||||
|
||||
def relative_file_path
|
||||
file_path_parts = [
|
||||
self.version,
|
||||
*self.class.path_segments(
|
||||
FILE_PATH_PATTERNS[self.version.to_sym],
|
||||
HexUtil.bin2hex(self.sha256)
|
||||
)
|
||||
]
|
||||
file_path_parts.join("/")
|
||||
end
|
||||
|
||||
def self.path_segments(pattern, sha256_hex)
|
||||
parts = []
|
||||
offset = 0
|
||||
pattern.each do |part_len|
|
||||
parts << sha256_hex[offset, part_len]
|
||||
offset += part_len
|
||||
end
|
||||
parts << sha256_hex
|
||||
parts
|
||||
end
|
||||
end
|
||||
14
config/blob_file_location.yml
Normal file
14
config/blob_file_location.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
test:
|
||||
tmp/blob_files_test
|
||||
|
||||
development:
|
||||
/mnt/blob_files_development
|
||||
|
||||
staging:
|
||||
/mnt/blob_files_production
|
||||
|
||||
production:
|
||||
/mnt/blob_files_production
|
||||
|
||||
worker:
|
||||
/mnt/blob_files_production
|
||||
@@ -50,28 +50,19 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "res_headers_id"
|
||||
t.integer "diff_type", default: 0
|
||||
t.binary "native_blob_entry_sha256"
|
||||
t.index ["imported_from_file"],
|
||||
name: "index_cache_http_log_entries_on_imported_from_file"
|
||||
t.index %w[path host], name: "index_cache_http_log_entries_on_path_and_host"
|
||||
t.index ["path"],
|
||||
name: "cache_http_log_entries_path_idx",
|
||||
opclass: :gist_trgm_ops,
|
||||
using: :gist
|
||||
t.index ["imported_from_file"], name: "index_cache_http_log_entries_on_imported_from_file"
|
||||
t.index ["path", "host"], name: "index_cache_http_log_entries_on_path_and_host"
|
||||
t.index ["path"], name: "cache_http_log_entries_path_idx", opclass: :gist_trgm_ops, using: :gist
|
||||
t.index ["path"], name: "index_pattern_ops_on_hle_entry_path"
|
||||
t.index ["requested_at"],
|
||||
name: "index_cache_http_log_entries_on_requested_at"
|
||||
t.index ["requested_at"], name: "index_cache_http_log_entries_on_requested_at"
|
||||
end
|
||||
|
||||
create_table "cache_http_log_entry_headers",
|
||||
id: :serial,
|
||||
force: :cascade do |t|
|
||||
create_table "cache_http_log_entry_headers", id: :serial, force: :cascade do |t|
|
||||
t.hstore "headers", null: false
|
||||
t.binary "sha256", null: false
|
||||
t.datetime "created_at"
|
||||
t.datetime "updated_at"
|
||||
t.index ["sha256"],
|
||||
name: "index_cache_http_log_entry_headers_on_sha256",
|
||||
unique: true
|
||||
t.index ["sha256"], name: "index_cache_http_log_entry_headers_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "e621_posts", id: :serial, force: :cascade do |t|
|
||||
@@ -98,8 +89,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "imgsearch_entry_id"
|
||||
t.index ["blob_entry_id"], name: "index_e621_posts_on_blob_entry_id"
|
||||
t.index ["e621_id"], name: "index_e621_posts_on_e621_id", unique: true
|
||||
t.index ["imgsearch_entry_id"],
|
||||
name: "index_e621_posts_on_imgsearch_entry_id"
|
||||
t.index ["imgsearch_entry_id"], name: "index_e621_posts_on_imgsearch_entry_id"
|
||||
t.index ["md5"], name: "index_e621_posts_on_md5"
|
||||
end
|
||||
|
||||
@@ -248,10 +238,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "object_type", limit: 2, null: false
|
||||
t.integer "object_id", null: false
|
||||
t.bigint "fingerprints", array: true
|
||||
t.index %w[object_type object_id],
|
||||
name:
|
||||
"index_lite_media_file_fingerprints_on_object_type_and_object_id",
|
||||
unique: true
|
||||
t.index ["object_type", "object_id"], name: "index_lite_media_file_fingerprints_on_object_type_and_object_id", unique: true
|
||||
end
|
||||
|
||||
create_table "lite_s3_objects", force: :cascade do |t|
|
||||
@@ -271,8 +258,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.float "total_time"
|
||||
t.bigint "calls"
|
||||
t.datetime "captured_at"
|
||||
t.index %w[database captured_at],
|
||||
name: "index_pghero_query_stats_on_database_and_captured_at"
|
||||
t.index ["database", "captured_at"], name: "index_pghero_query_stats_on_database_and_captured_at"
|
||||
end
|
||||
|
||||
create_table "pghero_space_stats", id: :serial, force: :cascade do |t|
|
||||
@@ -281,8 +267,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.text "relation"
|
||||
t.bigint "size"
|
||||
t.datetime "captured_at"
|
||||
t.index %w[database captured_at],
|
||||
name: "index_pghero_space_stats_on_database_and_captured_at"
|
||||
t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at"
|
||||
end
|
||||
|
||||
create_table "watch_jobs", id: :serial, force: :cascade do |t|
|
||||
@@ -319,9 +304,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "status"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index %w[enum_type weasyl_id],
|
||||
name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id",
|
||||
unique: true
|
||||
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_descriptions", id: :serial, force: :cascade do |t|
|
||||
@@ -333,18 +316,13 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
create_table "weasyl_joins_user_follows", id: :serial, force: :cascade do |t|
|
||||
t.integer "follower_id", null: false
|
||||
t.integer "followed_id", null: false
|
||||
t.index %w[follower_id followed_id],
|
||||
name:
|
||||
"index_weasyl_joins_user_follows_on_follower_id_and_followed_id",
|
||||
unique: true
|
||||
t.index ["follower_id", "followed_id"], name: "index_weasyl_joins_user_follows_on_follower_id_and_followed_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_joins_user_friends", id: :serial, force: :cascade do |t|
|
||||
t.integer "a_id", null: false
|
||||
t.integer "b_id", null: false
|
||||
t.index %w[a_id b_id],
|
||||
name: "index_weasyl_joins_user_friends_on_a_id_and_b_id",
|
||||
unique: true
|
||||
t.index ["a_id", "b_id"], name: "index_weasyl_joins_user_friends_on_a_id_and_b_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_medias", id: :serial, force: :cascade do |t|
|
||||
@@ -377,9 +355,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.datetime "full_scanned_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index %w[enum_type weasyl_id],
|
||||
name: "index_weasyl_posts_on_enum_type_and_weasyl_id",
|
||||
unique: true
|
||||
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_posts_on_enum_type_and_weasyl_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_users", id: :serial, force: :cascade do |t|
|
||||
@@ -393,9 +369,7 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "userid"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["login_name"],
|
||||
name: "index_weasyl_users_on_login_name",
|
||||
unique: true
|
||||
t.index ["login_name"], name: "index_weasyl_users_on_login_name", unique: true
|
||||
end
|
||||
|
||||
create_table "xtwitter_tweets", id: :serial, force: :cascade do |t|
|
||||
@@ -420,4 +394,5 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -9,6 +9,6 @@ class DropGoodJobs < ActiveRecord::Migration[7.0]
|
||||
drop_table :good_job_batches
|
||||
drop_table :good_job_executions
|
||||
drop_table :good_job_processes
|
||||
drop_table :good_job_setting
|
||||
drop_table :good_job_settings
|
||||
end
|
||||
end
|
||||
|
||||
41
db/redux_migrate/20241218190906_create_blob_files.rb
Normal file
41
db/redux_migrate/20241218190906_create_blob_files.rb
Normal file
@@ -0,0 +1,41 @@
|
||||
class CreateBlobFiles < ActiveRecord::Migration[7.0]
|
||||
NUM_PARTITIONS = 64
|
||||
|
||||
def up
|
||||
main_table_sql = <<~SQL
|
||||
CREATE TABLE blob_files (
|
||||
sha256 bytea NOT NULL, -- sha256 of the file
|
||||
version integer NOT NULL, -- how to reconstruct the file path based on sha256
|
||||
metadata jsonb NOT NULL DEFAULT '{}', -- metadata about the file, where it came from, etc
|
||||
content_type character varying NOT NULL, -- content type of the file
|
||||
size_bytes integer NOT NULL, -- size of the file in bytes
|
||||
created_at timestamp(6) without time zone NOT NULL
|
||||
) PARTITION BY HASH (sha256)
|
||||
SQL
|
||||
execute main_table_sql
|
||||
|
||||
NUM_PARTITIONS.times do |partnum|
|
||||
partition_table_name = :"blob_files_#{partnum.to_s.rjust(2, "0")}"
|
||||
partition_table_sql = <<~SQL
|
||||
CREATE TABLE #{partition_table_name}
|
||||
PARTITION OF blob_files FOR
|
||||
VALUES WITH (
|
||||
MODULUS #{NUM_PARTITIONS},
|
||||
REMAINDER #{partnum}
|
||||
)
|
||||
SQL
|
||||
execute partition_table_sql
|
||||
add_index partition_table_name, :sha256, unique: true
|
||||
end
|
||||
|
||||
add_index :blob_files, :sha256, unique: true
|
||||
end
|
||||
|
||||
def down
|
||||
drop_table :blob_files
|
||||
0..NUM_PARTITIONS.times do |partnum|
|
||||
partition_table_name = :"blob_files_#{partnum.to_s.rjust(2, "0")}"
|
||||
drop_table partition_table_name
|
||||
end
|
||||
end
|
||||
end
|
||||
652
db/schema.rb
generated
652
db/schema.rb
generated
@@ -10,7 +10,7 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 2024_12_17_182130) do
|
||||
ActiveRecord::Schema[7.0].define(version: 2024_12_18_190906) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_prewarm"
|
||||
enable_extension "pg_stat_statements"
|
||||
@@ -605,6 +605,656 @@ ActiveRecord::Schema[7.0].define(version: 2024_12_17_182130) do
|
||||
t.index ["sha256"], name: "index_blob_entries_p_63_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_00", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_00_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_01", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_01_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_02", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_02_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_03", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_03_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_04", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_04_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_05", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_05_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_06", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_06_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_07", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_07_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_08", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_08_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_09", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_09_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_10", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_10_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_11", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_11_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_12", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_12_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_13", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_13_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_14", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_14_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_15", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_15_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_16", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_16_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_17", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_17_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_18", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_18_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_19", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_19_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_20", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_20_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_21", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_21_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_22", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_22_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_23", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_23_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_24", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_24_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_25", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_25_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_26", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_26_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_27", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_27_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_28", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_28_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_29", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_29_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_30", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_30_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_31", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_31_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_32", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_32_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_33", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_33_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_34", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_34_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_35", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_35_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_36", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_36_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_37", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_37_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_38", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_38_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_39", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_39_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_40", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_40_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_41", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_41_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_42", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_42_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_43", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_43_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_44", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_44_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_45", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_45_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_46", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_46_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_47", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_47_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_48", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_48_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_49", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_49_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_50", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_50_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_51", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_51_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_52", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_52_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_53", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_53_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_54", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_54_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_55", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_55_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_56", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_56_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_57", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_57_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_58", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_58_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_59", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_59_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_60", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_60_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_61", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_61_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_62", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_62_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "blob_files_63", id: false, force: :cascade do |t|
|
||||
t.binary "sha256", null: false
|
||||
t.integer "version", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.string "content_type", null: false
|
||||
t.integer "size_bytes", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["sha256"], name: "index_blob_files_63_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "delayed_jobs", force: :cascade do |t|
|
||||
t.integer "priority", default: 0, null: false
|
||||
t.integer "attempts", default: 0, null: false
|
||||
|
||||
28
package-lock.json
generated
28
package-lock.json
generated
@@ -35,8 +35,10 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@pmmmwh/react-refresh-webpack-plugin": "^0.5.10",
|
||||
"@prettier/plugin-ruby": "^4.0.4",
|
||||
"@types/lodash": "^4.14.192",
|
||||
"@types/react": "^18.0.33",
|
||||
"prettier": "^3.4.2",
|
||||
"react-refresh": "^0.14.0",
|
||||
"typescript": "^5.0.3",
|
||||
"webpack": "5",
|
||||
@@ -2089,6 +2091,16 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@prettier/plugin-ruby": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@prettier/plugin-ruby/-/plugin-ruby-4.0.4.tgz",
|
||||
"integrity": "sha512-lCpvfS/dQU5WrwN3AQ5vR8qrvj2h5gE41X08NNzAAXvHdM4zwwGRcP2sHSxfu6n6No+ljWCVx95NvJPFTTjCTg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"prettier": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@sinclair/typebox": {
|
||||
"version": "0.25.24",
|
||||
"resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.25.24.tgz",
|
||||
@@ -6575,6 +6587,22 @@
|
||||
"integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/prettier": {
|
||||
"version": "3.4.2",
|
||||
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.4.2.tgz",
|
||||
"integrity": "sha512-e9MewbtFo+Fevyuxn/4rrcDAaq0IYxPGLvObpQjiZBMAzB9IGmzlnG9RZy3FFas+eBMu2vA0CszMeduow5dIuQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"prettier": "bin/prettier.cjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/prettier/prettier?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/process-nextick-args": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
||||
|
||||
31
rake/blob_file.rake
Normal file
31
rake/blob_file.rake
Normal file
@@ -0,0 +1,31 @@
|
||||
namespace :blob_file do
|
||||
desc "migrate blob files to the new format"
|
||||
task migrate_blob_entries: :environment do
|
||||
batch_size = ENV["batch_size"]&.to_i || 1000
|
||||
start_at = ENV["start_at"] || ("00" * 32)
|
||||
num_migrated = 0
|
||||
|
||||
BlobEntryP
|
||||
.where("sha256 >= decode(?, 'hex')", start_at)
|
||||
.where("sha256 NOT IN (SELECT sha256 FROM blob_files)")
|
||||
.find_in_batches(batch_size: batch_size) do |batch|
|
||||
puts "loaded #{batch.size} blob entries starting at #{HexUtil.bin2hex(batch.first.sha256)}"
|
||||
batch.each do |blob_entry|
|
||||
blob_file = BlobFile.find_or_initialize_from_blob_entry(blob_entry)
|
||||
sha256_hex = HexUtil.bin2hex(blob_file.sha256)
|
||||
begin
|
||||
unless blob_file.persisted?
|
||||
blob_file.save!
|
||||
num_migrated += 1
|
||||
end
|
||||
rescue => e
|
||||
puts "error saving blob file #{sha256_hex}: #{e}"
|
||||
end
|
||||
end
|
||||
|
||||
puts "migrated #{batch.size} blob entries [last: #{HexUtil.bin2hex(batch.last.sha256)}]"
|
||||
end
|
||||
|
||||
puts "migrated #{num_migrated} blob entries"
|
||||
end
|
||||
end
|
||||
@@ -53,7 +53,7 @@ describe Domain::Fa::Job::FavsJob do
|
||||
user.scanned_favs_at = old_scanned_at = 1.day.ago
|
||||
user.save!
|
||||
perform_now({ url_name: "zzreg" })
|
||||
expect(user.scanned_favs_at).to eq(old_scanned_at)
|
||||
expect(user.scanned_favs_at).to be_within(1.second).of(old_scanned_at)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -7,13 +7,10 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
|
||||
"https://inkbunny.net/api_search.php?orderby=create_datetime&keywords=no&title=no&description=no"
|
||||
end
|
||||
let(:api_submissions_url) do
|
||||
"https://inkbunny.net/api_submissions.php" +
|
||||
"?submission_ids=3104202,3104200,3104197&" +
|
||||
"show_description=yes&show_writing=yes&show_pools=yes"
|
||||
"https://inkbunny.net/api_submissions.php?submission_ids=3104202,3104200,3104197&show_description=yes&show_writing=yes&show_pools=yes"
|
||||
end
|
||||
let(:api_submissions_1047334_url) do
|
||||
"https://inkbunny.net/api_submissions.php" + "?submission_ids=1047334&" +
|
||||
"show_description=yes&show_writing=yes&show_pools=yes"
|
||||
"https://inkbunny.net/api_submissions.php?submission_ids=1047334&show_description=yes&show_writing=yes&show_pools=yes"
|
||||
end
|
||||
|
||||
context "the files do not change in the response" do
|
||||
@@ -29,7 +26,7 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
|
||||
SpecUtil.read_fixture_file("domain/inkbunny/job/api_search.json")
|
||||
},
|
||||
{
|
||||
method: :post,
|
||||
method: :get,
|
||||
uri: api_submissions_url,
|
||||
content_type: "application/json",
|
||||
contents:
|
||||
@@ -141,7 +138,7 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
|
||||
)
|
||||
},
|
||||
{
|
||||
method: :post,
|
||||
method: :get,
|
||||
uri: api_submissions_1047334_url,
|
||||
content_type: "application/json",
|
||||
contents:
|
||||
@@ -160,7 +157,7 @@ describe Domain::Inkbunny::Job::LatestPostsJob do
|
||||
)
|
||||
},
|
||||
{
|
||||
method: :post,
|
||||
method: :get,
|
||||
uri: api_submissions_1047334_url,
|
||||
content_type: "application/json",
|
||||
contents:
|
||||
|
||||
@@ -43,7 +43,7 @@ describe Domain::E621::CsvPostImporter do
|
||||
expect(post.tags_array).to eq(%w[tag1 tag2])
|
||||
expect(post.tags.map(&:name)).to match(%w[tag1 tag2])
|
||||
expect(post.taggings.map(&:category).to_set).to eq(["cat_general"].to_set)
|
||||
expect(post.e621_updated_at).to eq(Time.parse "2023-08-24T07:35:09-07:00")
|
||||
expect(post.e621_updated_at).to eq(Time.parse "2023-08-24T07:35:09")
|
||||
end
|
||||
|
||||
it "does not touch posts updated after the csv" do
|
||||
|
||||
@@ -17,7 +17,7 @@ class SpecUtil
|
||||
response_body: "http body",
|
||||
performer_name: "direct"
|
||||
)
|
||||
mock = instance_double("Scraper::HttpPerformer")
|
||||
mock = instance_double("Scraper::CurlHttpPerformer")
|
||||
allow(mock).to receive(:is_a?).with(String).and_return(false)
|
||||
allow(mock).to receive(:name).and_return(performer_name)
|
||||
allow(mock).to receive(:do_request).with(
|
||||
@@ -25,7 +25,7 @@ class SpecUtil
|
||||
expected_url,
|
||||
request_headers
|
||||
).and_return(
|
||||
Scraper::HttpPerformer::Response.new(
|
||||
Scraper::CurlHttpPerformer::Response.new(
|
||||
response_code,
|
||||
response_headers,
|
||||
response_time_ms,
|
||||
|
||||
70
test/models/blob_file_test.rb
Normal file
70
test/models/blob_file_test.rb
Normal file
@@ -0,0 +1,70 @@
|
||||
class BlobFileTest < ActiveSupport::TestCase
|
||||
setup do
|
||||
# safeguard against running this test in a non-test environment
|
||||
root_dir =
|
||||
File.absolute_path(Rails.application.config_for("blob_file_location"))
|
||||
assert_match %r{^#{Rails.root}/tmp}, root_dir
|
||||
FileUtils.rm_rf(root_dir)
|
||||
end
|
||||
|
||||
test "building a blob file works" do
|
||||
content_bytes = TestUtil.random_string(1024)
|
||||
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
|
||||
assert blob_file.valid?
|
||||
assert_equal blob_file.size_bytes, 1024
|
||||
assert_equal blob_file.content_bytes, content_bytes
|
||||
assert_equal blob_file.content_type, "text"
|
||||
assert_equal blob_file.content_bytes.encoding, Encoding::ASCII_8BIT
|
||||
|
||||
assert blob_file.save
|
||||
end
|
||||
|
||||
test "counts bytes with unicode characters" do
|
||||
content_bytes = "fooばr"
|
||||
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
|
||||
assert blob_file.valid?
|
||||
assert_equal blob_file.size_bytes, 7
|
||||
assert blob_file.save
|
||||
sha256 = blob_file.sha256
|
||||
|
||||
blob_file = BlobFile.find(sha256)
|
||||
assert_equal blob_file.content_bytes, content_bytes
|
||||
end
|
||||
|
||||
test "puts the file in the right place" do
|
||||
content_bytes = TestUtil.random_string(1024)
|
||||
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
|
||||
assert blob_file.save
|
||||
assert File.exist?(blob_file.absolute_file_path)
|
||||
assert_equal File.binread(blob_file.absolute_file_path), content_bytes
|
||||
end
|
||||
|
||||
test "saving blob when the file already exists works" do
|
||||
content_bytes = TestUtil.random_string(1024)
|
||||
blob_file = BlobFile.new(content_bytes: content_bytes, content_type: "text")
|
||||
FileUtils.mkdir_p(File.dirname(blob_file.absolute_file_path))
|
||||
File.binwrite(blob_file.absolute_file_path, content_bytes)
|
||||
assert blob_file.save
|
||||
assert_equal blob_file.content_bytes, content_bytes
|
||||
end
|
||||
|
||||
test "file path segmentation works" do
|
||||
test_cases = [
|
||||
["abcd1234", [2], %w[ab abcd1234]],
|
||||
["abcd1234", [4, 2], %w[abcd 12 abcd1234]],
|
||||
["abcd1234", [4, 2, 2], %w[abcd 12 34 abcd1234]],
|
||||
["abcd1234", [2, 2, 1], %w[ab cd 1 abcd1234]]
|
||||
]
|
||||
test_cases.each do |sha256_hex, pattern, expected|
|
||||
assert_equal BlobFile.path_segments(pattern, sha256_hex), expected
|
||||
end
|
||||
end
|
||||
|
||||
test "from an initialized BlobEntryP" do
|
||||
blob_entry = TestUtil.build_blob_entry
|
||||
blob_file = BlobFile.find_or_initialize_from_blob_entry(blob_entry)
|
||||
assert blob_file.save
|
||||
assert_equal blob_file.content_bytes, blob_entry.contents
|
||||
assert_equal blob_file.content_type, blob_entry.content_type
|
||||
end
|
||||
end
|
||||
@@ -111,7 +111,7 @@ module TestUtil
|
||||
mock.expect(:name, "direct")
|
||||
mock.expect(
|
||||
:get,
|
||||
Scraper::HttpPerformer::Response.new(
|
||||
Scraper::CurlHttpPerformer::Response.new(
|
||||
response_code,
|
||||
response_headers,
|
||||
response_time_ms,
|
||||
|
||||
Reference in New Issue
Block a user