visual fingerprinting
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -12,6 +12,7 @@ lib/xdiff
|
||||
ext/xdiff/Makefile
|
||||
ext/xdiff/xdiff
|
||||
user_scripts/dist
|
||||
migrated_files.txt
|
||||
|
||||
# use yarn to manage node_modules
|
||||
package-lock.json
|
||||
|
||||
142
Rakefile
142
Rakefile
@@ -482,28 +482,6 @@ task sample_migrated_favs: :environment do
|
||||
puts "new fav count: #{new_user.faved_posts.count}"
|
||||
end
|
||||
|
||||
task clear_e621_user_favs_migrated_at: :environment do
|
||||
puts "clearing migrated_user_favs_at"
|
||||
ReduxApplicationRecord.connection.execute(<<~SQL)
|
||||
UPDATE domain_users
|
||||
SET json_attributes = json_attributes - 'migrated_user_favs_at'
|
||||
WHERE type = 'Domain::User::E621User'
|
||||
AND json_attributes->>'migrated_user_favs_at' IS NOT NULL
|
||||
SQL
|
||||
puts "done"
|
||||
# query = Domain::User::E621User.where.not(migrated_user_favs_at: nil)
|
||||
# pb = ProgressBar.create(total: query.count, format: "%t: %c/%C %B %p%% %a %e")
|
||||
# query.find_in_batches(batch_size: 1000) do |b|
|
||||
# ReduxApplicationRecord.transaction do
|
||||
# b.each do |u|
|
||||
# u.migrated_user_favs_at = nil
|
||||
# u.save!
|
||||
# end
|
||||
# end
|
||||
# pb.progress += b.size
|
||||
# end
|
||||
end
|
||||
|
||||
task create_post_file_fingerprints: :environment do
|
||||
def migrate_posts_for_user(user)
|
||||
puts "migrating posts for #{user.to_param}"
|
||||
@@ -512,30 +490,130 @@ task create_post_file_fingerprints: :environment do
|
||||
total: user.posts.count,
|
||||
format: "%t: %c/%C %B %p%% %a %e",
|
||||
)
|
||||
|
||||
user
|
||||
.posts
|
||||
.includes(files: :blob)
|
||||
.find_in_batches(batch_size: 16) do |batch|
|
||||
.includes(:files)
|
||||
.find_in_batches(batch_size: 64) do |batch|
|
||||
ReduxApplicationRecord.transaction do
|
||||
batch.each do |post|
|
||||
post.files.each { |file| file.ensure_fingerprint! }
|
||||
puts "migrated #{post.id} / #{post.to_param} / '#{post.title_for_view}'"
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
batch.each { |post| migrate_post(post) }
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if ENV["user"].present?
|
||||
def migrate_post(post)
|
||||
puts "migrating #{post.id} / #{post.to_param} / '#{post.title_for_view}'"
|
||||
ColorLogger.quiet do
|
||||
post.files.each do |file|
|
||||
migrate_post_file(file)
|
||||
rescue StandardError => e
|
||||
puts "error: #{e.message}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def migrate_post_file(post_file)
|
||||
job = Domain::PostFileThumbnailJob.new
|
||||
ColorLogger.quiet do
|
||||
job.perform({ post_file: })
|
||||
rescue => e
|
||||
puts "error: #{e.message}"
|
||||
end
|
||||
end
|
||||
|
||||
if ENV["post_file_descending"].present?
|
||||
total = 49_783_962 # cache this value
|
||||
pb = ProgressBar.create(total:, format: "%t: %c/%C %B %p%% %a %e")
|
||||
i = 0
|
||||
Domain::PostFile
|
||||
.where(state: "ok")
|
||||
.includes(:blob)
|
||||
.find_each(
|
||||
order: :desc,
|
||||
batch_size: 32,
|
||||
start: ENV["start_at"],
|
||||
) do |post_file|
|
||||
i += 1
|
||||
if i % 100 == 0
|
||||
puts "migrating #{post_file.id} / #{post_file.post.title_for_view}"
|
||||
end
|
||||
migrate_post_file(post_file)
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
elsif ENV["posts_descending"].present?
|
||||
# total = Domain::Post.count
|
||||
total = 66_431_808 # cache this value
|
||||
pb = ProgressBar.create(total:, format: "%t: %c/%C %B %p%% %a %e")
|
||||
Domain::Post.find_each(order: :desc) do |post|
|
||||
migrate_post(post) unless post.is_a?(Domain::Post::InkbunnyPost)
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
elsif ENV["user"].present?
|
||||
for_user = ENV["user"] || raise("need 'user'")
|
||||
user = DomainController.find_model_from_param(Domain::User, for_user)
|
||||
raise "user '#{for_user}' not found" unless user
|
||||
migrate_posts_for_user(user)
|
||||
elsif ENV["users_descending"].present?
|
||||
# all users with posts, ordered by post count descending
|
||||
users = Domain::User::FaUser.order(num_watched_by: :desc).limit(20)
|
||||
users.find_each(batch_size: 1) { |user| migrate_posts_for_user(user) }
|
||||
migrated_file = File.open("migrated_files.txt", "a+")
|
||||
migrated_file.seek(0)
|
||||
migrated_users = migrated_file.readlines.map(&:strip)
|
||||
users =
|
||||
Domain::User::FaUser.order(
|
||||
Arel.sql("json_attributes->>'num_watched_by' DESC NULLS LAST"),
|
||||
).pluck(:id)
|
||||
|
||||
users.each do |user_id|
|
||||
user = Domain::User::FaUser.find(user_id)
|
||||
next if migrated_users.include?(user.to_param)
|
||||
puts "migrating posts for #{user.to_param} (#{user.num_watched_by} watched by)"
|
||||
migrate_posts_for_user(user)
|
||||
migrated_file.write("#{user.to_param}\n")
|
||||
migrated_file.flush
|
||||
end
|
||||
migrated_file.close
|
||||
else
|
||||
raise "need 'user' or 'users_descending'"
|
||||
end
|
||||
end
|
||||
|
||||
task enqueue_pending_post_files: :environment do
|
||||
query = Domain::PostFile.where(state: "pending")
|
||||
puts "enqueueing #{query.count} pending post files"
|
||||
query.find_in_batches(batch_size: 100, start: ENV["start_at"]) do |batch|
|
||||
while (
|
||||
queue_size =
|
||||
GoodJob::Job.where(
|
||||
job_class: "Job::PostFileJob",
|
||||
performed_at: nil,
|
||||
scheduled_at: nil,
|
||||
error: nil,
|
||||
).count
|
||||
) > 100
|
||||
puts "queue size: #{queue_size}"
|
||||
sleep 10
|
||||
end
|
||||
batch.each do |post_file|
|
||||
Job::PostFileJob.set(priority: 10).perform_later(post_file:)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
task find_post_files_with_empty_response: :environment do
|
||||
query =
|
||||
Domain::PostFile
|
||||
.where(state: "ok", retry_count: 0)
|
||||
.joins(:log_entry)
|
||||
.where(http_log_entries: { response_sha256: BlobFile::EMPTY_FILE_SHA256 })
|
||||
|
||||
pb = ProgressBar.create(total: query.count, format: "%t: %c/%C %B %p%% %a %e")
|
||||
|
||||
query.find_each(batch_size: 10) do |post_file|
|
||||
# puts "post_file: #{post_file.id} / '#{post_file.post.to_param}'"
|
||||
post_file.state_pending!
|
||||
post_file.save!
|
||||
Job::PostFileJob.perform_now(post_file:)
|
||||
pb.progress = [pb.progress + 1, pb.total].min
|
||||
end
|
||||
end
|
||||
|
||||
@@ -148,7 +148,20 @@ class Domain::PostsController < DomainController
|
||||
# Create thumbnail for the view if possible
|
||||
@uploaded_image_data_uri = create_thumbnail(image_path, content_type)
|
||||
@uploaded_hash_value = generate_fingerprint(image_path)
|
||||
@post_file_fingerprints = find_similar_fingerprints(@uploaded_hash_value)
|
||||
@uploaded_detail_hash_value = generate_detail_fingerprint(image_path)
|
||||
@post_file_fingerprints =
|
||||
find_similar_fingerprints(@uploaded_hash_value).to_a
|
||||
@post_file_fingerprints.sort! do |a, b|
|
||||
helpers.calculate_similarity_percentage(
|
||||
b.fingerprint_detail_value,
|
||||
@uploaded_detail_hash_value,
|
||||
) <=>
|
||||
helpers.calculate_similarity_percentage(
|
||||
a.fingerprint_detail_value,
|
||||
@uploaded_detail_hash_value,
|
||||
)
|
||||
end
|
||||
@post_file_fingerprints = @post_file_fingerprints.take(10)
|
||||
@posts = @post_file_fingerprints.map(&:post_file).compact.map(&:post)
|
||||
ensure
|
||||
# Clean up any temporary files
|
||||
@@ -234,6 +247,12 @@ class Domain::PostsController < DomainController
|
||||
Domain::PostFile::BitFingerprint.from_file_path(image_path)
|
||||
end
|
||||
|
||||
# Generate a detail fingerprint from the image path
|
||||
sig { params(image_path: String).returns(String) }
|
||||
def generate_detail_fingerprint(image_path)
|
||||
Domain::PostFile::BitFingerprint.detail_from_file_path(image_path)
|
||||
end
|
||||
|
||||
# Find similar images based on the fingerprint
|
||||
sig { params(fingerprint_value: String).returns(ActiveRecord::Relation) }
|
||||
def find_similar_fingerprints(fingerprint_value)
|
||||
@@ -251,7 +270,7 @@ class Domain::PostsController < DomainController
|
||||
.select("*")
|
||||
.from(subquery)
|
||||
.order("distance ASC")
|
||||
.limit(10)
|
||||
.limit(32)
|
||||
end
|
||||
|
||||
sig { override.returns(DomainController::DomainParamConfig) }
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# typed: true
|
||||
module Domain
|
||||
module VisualSearchHelper
|
||||
# Calculate the similarity percentage between two fingerprint hash values
|
||||
@@ -7,15 +8,17 @@ module Domain
|
||||
def calculate_similarity_percentage(hash_value, reference_hash_value)
|
||||
# Calculate hamming distance between the two hash values
|
||||
distance =
|
||||
hash_value
|
||||
.split("")
|
||||
.zip(reference_hash_value.split(""))
|
||||
.map { |a, b| a.to_i ^ b.to_i }
|
||||
.sum
|
||||
# Maximum possible distance for a 256-bit hash
|
||||
DHashVips::IDHash.distance(
|
||||
hash_value.to_i(2),
|
||||
reference_hash_value.to_i(2),
|
||||
)
|
||||
|
||||
max_distance = 256
|
||||
# Calculate similarity percentage based on distance
|
||||
((max_distance - distance) / max_distance.to_f * 100).round(1)
|
||||
# # Calculate similarity percentage based on distance
|
||||
((max_distance - distance) / max_distance.to_f * 100).round(1).clamp(
|
||||
0,
|
||||
100,
|
||||
)
|
||||
end
|
||||
|
||||
# Determine the background color class based on similarity percentage
|
||||
|
||||
@@ -51,15 +51,24 @@ module Domain::StaticFileJobHelper
|
||||
return
|
||||
end
|
||||
|
||||
status_code = response.status_code
|
||||
if response.log_entry.response_sha256 == BlobFile::EMPTY_FILE_SHA256 &&
|
||||
post_file.retry_count == 0
|
||||
logger.warn(
|
||||
format_tags("empty file, assuming server error and will try again"),
|
||||
)
|
||||
status_code = 500
|
||||
end
|
||||
|
||||
post_file.log_entry = response.log_entry
|
||||
post_file.last_status_code = response.status_code
|
||||
post_file.last_status_code = status_code
|
||||
|
||||
logger.tagged(make_arg_tag(response.log_entry)) do
|
||||
if response.status_code == 200
|
||||
if status_code == 200
|
||||
should_enqueue_thumbnail_job = true
|
||||
post_file.state_ok!
|
||||
post_file.retry_count = 0
|
||||
logger.info(format_tags("downloaded file"))
|
||||
elsif response.status_code == 404
|
||||
elsif status_code == 404
|
||||
post_file.state_terminal_error!
|
||||
logger.error(format_tags("404, terminal error state"))
|
||||
else
|
||||
@@ -86,7 +95,7 @@ module Domain::StaticFileJobHelper
|
||||
ensure
|
||||
post_file.save! if post_file
|
||||
if should_enqueue_thumbnail_job
|
||||
# defer_job(Domain::PostFileThumbnailJob, { post_file: })
|
||||
defer_job(Domain::PostFileThumbnailJob, { post_file: })
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
21
app/jobs/job/post_file_job.rb
Normal file
21
app/jobs/job/post_file_job.rb
Normal file
@@ -0,0 +1,21 @@
|
||||
# typed: strict
|
||||
class Job::PostFileJob < Scraper::JobBase
|
||||
extend T::Sig
|
||||
include Domain::StaticFileJobHelper
|
||||
|
||||
queue_as :static_file
|
||||
|
||||
sig { override.returns(Symbol) }
|
||||
def self.http_factory_method
|
||||
:get_generic_http_client
|
||||
end
|
||||
|
||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform(args)
|
||||
post_file = T.cast(args[:post_file], Domain::PostFile)
|
||||
post = post_file.post
|
||||
logger.tagged(make_arg_tag(post_file), make_arg_tag(post)) do
|
||||
download_post_file(post_file)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -73,16 +73,10 @@ module HasColorLogger
|
||||
case arg
|
||||
when Domain::User
|
||||
name ||= "user"
|
||||
prefix, attr = arg.class.param_prefix_and_attribute
|
||||
tags << make_tag("#{name}.kind", prefix)
|
||||
tags << make_tag("#{name}.id", arg.id)
|
||||
tags << make_tag("#{name}.#{attr}", arg.send(attr))
|
||||
tags << make_tag("#{name}.param", arg.to_param)
|
||||
when Domain::Post
|
||||
name ||= "post"
|
||||
prefix, attr = arg.class.param_prefix_and_attribute
|
||||
tags << make_tag("#{name}.kind", prefix)
|
||||
tags << make_tag("#{name}.id", arg.id)
|
||||
tags << make_tag("#{name}.#{attr}", arg.send(attr))
|
||||
tags << make_tag("#{name}.param", arg.to_param)
|
||||
when Domain::PostFile
|
||||
name ||= "file"
|
||||
tags << make_tag("#{name}.id", arg.id)
|
||||
|
||||
@@ -1,17 +1,44 @@
|
||||
# typed: strict
|
||||
# frozen_string_literal: true
|
||||
FFMPEG.logger = Logger.new(nil)
|
||||
|
||||
class LoadedMedia::Webm < LoadedMedia
|
||||
include HasColorLogger
|
||||
|
||||
::FFMPEG.logger = Logger.new(nil)
|
||||
FFMPEG_BIN = T.let(`which ffmpeg`.strip, String)
|
||||
FFPROBE_BIN = T.let(`which ffprobe`.strip, String)
|
||||
|
||||
sig { params(media_path: String).void }
|
||||
def initialize(media_path)
|
||||
@media_path = media_path
|
||||
@media = T.let(FFMPEG::Media.new(media_path), FFMPEG::Media)
|
||||
duration = @media.duration
|
||||
frame_rate = @media.frame_rate
|
||||
@num_frames = T.let((frame_rate * duration).to_i, Integer)
|
||||
@duration = T.let(duration, Float)
|
||||
# frame_rate = @media.frame_rate
|
||||
# @num_frames = T.let((frame_rate * duration).to_i, Integer)
|
||||
|
||||
output, error, status =
|
||||
Open3.capture3(
|
||||
FFPROBE_BIN,
|
||||
"-v",
|
||||
"error",
|
||||
"-count_frames",
|
||||
"-select_streams",
|
||||
"v:0",
|
||||
"-show_entries",
|
||||
"stream=nb_read_frames",
|
||||
"-of",
|
||||
"default=nokey=1:noprint_wrappers=1",
|
||||
media_path,
|
||||
)
|
||||
|
||||
if status.success?
|
||||
@num_frames = T.let(output.strip.to_i, Integer)
|
||||
else
|
||||
$stderr.print error
|
||||
raise "Failed to get frame count from ffprobe: exit code #{status.exitstatus}"
|
||||
end
|
||||
|
||||
@duration = T.let(duration, Float)
|
||||
raise("no frames found in webm") if @num_frames.zero?
|
||||
end
|
||||
|
||||
@@ -26,19 +53,67 @@ class LoadedMedia::Webm < LoadedMedia
|
||||
.void
|
||||
end
|
||||
def write_frame_thumbnail(frame, path, options)
|
||||
frame_time = (frame.to_f * @duration) / @num_frames
|
||||
frame_time = frame_time.clamp(0.0..(@duration * 0.99))
|
||||
# Clamp the frame number to valid range for calculation
|
||||
clamped_frame = frame.clamp(0, @num_frames - 1)
|
||||
|
||||
# Calculate frame time using the clamped frame number
|
||||
frame_time = (clamped_frame.to_f * @duration) / @num_frames
|
||||
frame_time = frame_time.clamp(0.0..(@duration * 0.98))
|
||||
|
||||
logger.info(
|
||||
format_tags(make_tag("frame", frame), make_tag("frame_time", frame_time)),
|
||||
format_tags(
|
||||
make_tag("frame", frame),
|
||||
make_tag("frame_time", frame_time),
|
||||
make_tag("duration", @duration),
|
||||
make_tag("num_frames", @num_frames),
|
||||
),
|
||||
)
|
||||
|
||||
# Use the original frame number in the filename
|
||||
tmp_path =
|
||||
File.join(BlobFile::TMP_DIR, "webm-#{frame}-#{SecureRandom.uuid}.png")
|
||||
FileUtils.mkdir_p(File.dirname(tmp_path))
|
||||
|
||||
@media.screenshot(tmp_path, { seek_time: frame_time })
|
||||
raise("screenshot @ #{frame} failed") unless File.exist?(tmp_path)
|
||||
# @media.screenshot(tmp_path, { seek_time: frame_time })
|
||||
|
||||
# Determine if we should seek from start or end based on where we are in the file
|
||||
past_halfway = frame_time / @duration > 0.5
|
||||
|
||||
cmd = [FFMPEG_BIN, "-y", "-xerror", "-abort_on", "empty_output"] # Overwrite output files
|
||||
|
||||
if past_halfway
|
||||
# For frames in the second half of the file, seek from the end
|
||||
# Convert to a negative offset from the end
|
||||
end_offset = frame_time - @duration
|
||||
cmd.concat(["-sseof", end_offset.round(2).to_s])
|
||||
else
|
||||
# For frames in the first half, seek from the beginning
|
||||
cmd.concat(["-ss", frame_time.round(2).to_s])
|
||||
end
|
||||
|
||||
# Add input file and frame extraction options
|
||||
cmd.concat(
|
||||
[
|
||||
"-i",
|
||||
@media_path, # Input file
|
||||
"-vframes",
|
||||
"1", # Extract one frame
|
||||
"-f",
|
||||
"image2", # Force format to image2
|
||||
"-update",
|
||||
"1", # Update existing file
|
||||
tmp_path,
|
||||
],
|
||||
)
|
||||
|
||||
_output, error, status = Open3.capture3(*cmd)
|
||||
unless status.exitstatus == 0
|
||||
$stderr.print error
|
||||
raise "Failed to extract frame with ffmpeg: #{error}"
|
||||
end
|
||||
|
||||
# Use the original frame number in the error message
|
||||
raise("screenshot @ #{clamped_frame} failed") unless File.exist?(tmp_path)
|
||||
image = Vips::Image.new_from_file(tmp_path)
|
||||
write_image_thumbnail(image, path, options)
|
||||
ensure
|
||||
|
||||
@@ -32,6 +32,14 @@ class Scraper::ClientFactory
|
||||
end
|
||||
end
|
||||
|
||||
def self.get_generic_http_client
|
||||
if Rails.env.test? || Rails.env.development?
|
||||
@http_client_mock || raise("no http client mock set")
|
||||
else
|
||||
_http_client_impl(:generic, Scraper::GenericHttpClientConfig)
|
||||
end
|
||||
end
|
||||
|
||||
def self.get_fa_http_client
|
||||
if Rails.env.test?
|
||||
@http_client_mock || raise("no http client mock set")
|
||||
|
||||
24
app/lib/scraper/generic_http_client_config.rb
Normal file
24
app/lib/scraper/generic_http_client_config.rb
Normal file
@@ -0,0 +1,24 @@
|
||||
# typed: strict
|
||||
class Scraper::GenericHttpClientConfig < Scraper::HttpClientConfig
|
||||
extend T::Sig
|
||||
|
||||
sig { override.returns(T.nilable(T::Array[T.untyped])) }
|
||||
def cookies
|
||||
nil
|
||||
end
|
||||
|
||||
sig { override.returns(T::Array[[String, Numeric]]) }
|
||||
def ratelimit
|
||||
[["*", 0.5]]
|
||||
end
|
||||
|
||||
sig { override.returns(T::Array[String]) }
|
||||
def allowed_domains
|
||||
["*"]
|
||||
end
|
||||
|
||||
sig { override.returns(Integer) }
|
||||
def redirect_limit
|
||||
4
|
||||
end
|
||||
end
|
||||
@@ -75,7 +75,7 @@ class Domain::PostFile < ReduxApplicationRecord
|
||||
@blob_file_model ||=
|
||||
((sha256 = self.blob_sha256) ? BlobFile.migrate_sha256!(sha256) : nil)
|
||||
@blob_file_model
|
||||
end
|
||||
end || log_entry&.response
|
||||
end
|
||||
|
||||
sig { params(le: T.nilable(HttpLogEntry)).returns(T.nilable(HttpLogEntry)) }
|
||||
|
||||
@@ -13,6 +13,7 @@ class Domain::PostFile::BitFingerprint < ReduxApplicationRecord
|
||||
|
||||
# in bytes
|
||||
HASH_SIZE_BYTES = 32
|
||||
DETAIL_HASH_SIZE_BYTES = 128
|
||||
|
||||
# Find similar images based on the fingerprint
|
||||
sig { params(fingerprint: String).returns(ActiveRecord::Relation) }
|
||||
@@ -58,8 +59,13 @@ class Domain::PostFile::BitFingerprint < ReduxApplicationRecord
|
||||
next
|
||||
end
|
||||
fingerprint_value = from_file_path(thumbnail_path)
|
||||
fingerprint_detail_value = detail_from_file_path(thumbnail_path)
|
||||
fingerprint =
|
||||
post_file.bit_fingerprints.build(thumbnail:, fingerprint_value:)
|
||||
post_file.bit_fingerprints.build(
|
||||
thumbnail:,
|
||||
fingerprint_value:,
|
||||
fingerprint_detail_value:,
|
||||
)
|
||||
logger.info(
|
||||
format_tags(
|
||||
"computed fingerprint #{fingerprint_value.to_i(2).to_s(16).upcase}",
|
||||
@@ -117,9 +123,9 @@ class Domain::PostFile::BitFingerprint < ReduxApplicationRecord
|
||||
Float(result)
|
||||
end
|
||||
|
||||
# Create a PostFileFingerprint instance from a file path
|
||||
# Create a fingerprint from a file path
|
||||
# @param file_path [String] Path to the image file
|
||||
# @return [Domain::PostFileFingerprint] A non-persisted fingerprint model
|
||||
# @return [String] A non-persisted fingerprint model
|
||||
sig { params(file_path: String).returns(String) }
|
||||
def self.from_file_path(file_path)
|
||||
unless File.exist?(file_path)
|
||||
@@ -129,4 +135,17 @@ class Domain::PostFile::BitFingerprint < ReduxApplicationRecord
|
||||
fingerprint = DHashVips::IDHash.fingerprint(file_path)
|
||||
fingerprint.to_s(2).rjust(HASH_SIZE_BYTES * 8, "0")
|
||||
end
|
||||
|
||||
# Compute detail fingerprint from a file path
|
||||
# @param file_path [String] Path to the image file
|
||||
# @return [String] A non-persisted fingerprint model
|
||||
sig { params(file_path: String).returns(String) }
|
||||
def self.detail_from_file_path(file_path)
|
||||
unless File.exist?(file_path)
|
||||
raise ArgumentError, "File does not exist: #{file_path}"
|
||||
end
|
||||
|
||||
fingerprint = DHashVips::IDHash.fingerprint(file_path, 4)
|
||||
fingerprint.to_s(2).rjust(DETAIL_HASH_SIZE_BYTES * 8, "0")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -108,7 +108,7 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
|
||||
logger.info(format_tags(make_tag("num_frames", num_frames)))
|
||||
return [] if num_frames.zero?
|
||||
|
||||
existing_thumb_types = post_file.thumbnails.to_a.map(&:thumb_type)
|
||||
existing_thumb_types = post_file.thumbnails.to_a.map(&:thumb_type).uniq
|
||||
logger.info(
|
||||
format_tags(make_tag("existing_thumb_types", existing_thumb_types)),
|
||||
)
|
||||
@@ -116,6 +116,7 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
|
||||
FileUtils.mkdir_p(BlobFile::TMP_DIR)
|
||||
thumbnails = []
|
||||
THUMB_TYPE_TO_OPTIONS.each do |thumb_type, options|
|
||||
thumb_type = thumb_type.to_s
|
||||
logger.tagged(make_tag("thumb_type", thumb_type)) do
|
||||
next if existing_thumb_types.include?(thumb_type)
|
||||
logger.info(format_tags("creating thumbnail"))
|
||||
@@ -142,7 +143,11 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
|
||||
media.write_frame_thumbnail(frame, thumb_file_path, options)
|
||||
end
|
||||
|
||||
thumbnail.save!
|
||||
begin
|
||||
thumbnail.save!
|
||||
rescue StandardError
|
||||
binding.pry
|
||||
end
|
||||
thumbnails << thumbnail
|
||||
end
|
||||
end
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
<% @post_file_fingerprints.each do |post_file_fingerprint| %>
|
||||
<% post_file = post_file_fingerprint.post_file %>
|
||||
<% post = post_file.post %>
|
||||
<% similarity_percentage = calculate_similarity_percentage(post_file_fingerprint.fingerprint_value, @uploaded_hash_value) %>
|
||||
<% similarity_percentage = calculate_similarity_percentage(post_file_fingerprint.fingerprint_detail_value, @uploaded_detail_hash_value) %>
|
||||
<div class="flex flex-col h-fit rounded-md border border-gray-300 bg-white shadow hover:shadow-md transition-shadow duration-300 overflow-hidden">
|
||||
<div class="flex justify-between items-center border-b border-gray-200 p-2 bg-gray-50 gap-2">
|
||||
<div class="flex items-center">
|
||||
|
||||
@@ -13,6 +13,7 @@ class CreatePostFileThumbnailsFingerprints < ActiveRecord::Migration[7.2]
|
||||
t.references :post_file, null: false, index: false
|
||||
t.references :thumbnail, null: false, index: false
|
||||
t.bit :fingerprint_value, limit: 256
|
||||
t.bit :fingerprint_detail_value, limit: 1024
|
||||
t.timestamps
|
||||
|
||||
t.index %i[post_file_id thumbnail_id], unique: true
|
||||
|
||||
10
db/migrate/20250310001005_move_blob_files_to_mirai.rb
Normal file
10
db/migrate/20250310001005_move_blob_files_to_mirai.rb
Normal file
@@ -0,0 +1,10 @@
|
||||
class MoveBlobFilesToMirai < ActiveRecord::Migration[7.2]
|
||||
disable_ddl_transaction!
|
||||
|
||||
def change
|
||||
(0...64).map do |i|
|
||||
table_name = "blob_files_#{i.to_s.rjust(2, "0")}"
|
||||
execute "alter table #{table_name} set tablespace mirai"
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,10 @@
|
||||
class MoveBlobFilesIndexesToMirai < ActiveRecord::Migration[7.2]
|
||||
disable_ddl_transaction!
|
||||
|
||||
def change
|
||||
(0...64).map do |i|
|
||||
index_name = "index_blob_files_#{i.to_s.rjust(2, "0")}_on_sha256"
|
||||
execute "alter index #{index_name} set tablespace mirai"
|
||||
end
|
||||
end
|
||||
end
|
||||
267
db/structure.sql
267
db/structure.sql
File diff suppressed because it is too large
Load Diff
57
sorbet/rbi/dsl/domain/post_file/bit_fingerprint.rbi
generated
57
sorbet/rbi/dsl/domain/post_file/bit_fingerprint.rbi
generated
@@ -722,6 +722,51 @@ class Domain::PostFile::BitFingerprint
|
||||
sig { void }
|
||||
def created_at_will_change!; end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def fingerprint_detail_value; end
|
||||
|
||||
sig { params(value: T.nilable(::String)).returns(T.nilable(::String)) }
|
||||
def fingerprint_detail_value=(value); end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def fingerprint_detail_value?; end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def fingerprint_detail_value_before_last_save; end
|
||||
|
||||
sig { returns(T.untyped) }
|
||||
def fingerprint_detail_value_before_type_cast; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def fingerprint_detail_value_came_from_user?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def fingerprint_detail_value_change; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def fingerprint_detail_value_change_to_be_saved; end
|
||||
|
||||
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
|
||||
def fingerprint_detail_value_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def fingerprint_detail_value_in_database; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def fingerprint_detail_value_previous_change; end
|
||||
|
||||
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
|
||||
def fingerprint_detail_value_previously_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def fingerprint_detail_value_previously_was; end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def fingerprint_detail_value_was; end
|
||||
|
||||
sig { void }
|
||||
def fingerprint_detail_value_will_change!; end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def fingerprint_value; end
|
||||
|
||||
@@ -905,6 +950,9 @@ class Domain::PostFile::BitFingerprint
|
||||
sig { void }
|
||||
def restore_created_at!; end
|
||||
|
||||
sig { void }
|
||||
def restore_fingerprint_detail_value!; end
|
||||
|
||||
sig { void }
|
||||
def restore_fingerprint_value!; end
|
||||
|
||||
@@ -929,6 +977,12 @@ class Domain::PostFile::BitFingerprint
|
||||
sig { returns(T::Boolean) }
|
||||
def saved_change_to_created_at?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def saved_change_to_fingerprint_detail_value; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def saved_change_to_fingerprint_detail_value?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def saved_change_to_fingerprint_value; end
|
||||
|
||||
@@ -1068,6 +1122,9 @@ class Domain::PostFile::BitFingerprint
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_created_at?; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_fingerprint_detail_value?; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_fingerprint_value?; end
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class Domain::PostFileThumbnailJob
|
||||
end
|
||||
def perform_later(args, &block); end
|
||||
|
||||
sig { params(args: T::Hash[::Symbol, T.untyped]).void }
|
||||
sig { params(args: T::Hash[::Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform_now(args); end
|
||||
end
|
||||
end
|
||||
|
||||
27
sorbet/rbi/dsl/job/post_file_job.rbi
generated
Normal file
27
sorbet/rbi/dsl/job/post_file_job.rbi
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
# typed: true
|
||||
|
||||
# DO NOT EDIT MANUALLY
|
||||
# This is an autogenerated file for dynamic methods in `Job::PostFileJob`.
|
||||
# Please instead update this file by running `bin/tapioca dsl Job::PostFileJob`.
|
||||
|
||||
|
||||
class Job::PostFileJob
|
||||
sig { returns(ColorLogger) }
|
||||
def logger; end
|
||||
|
||||
class << self
|
||||
sig { returns(ColorLogger) }
|
||||
def logger; end
|
||||
|
||||
sig do
|
||||
params(
|
||||
args: T::Hash[::Symbol, T.untyped],
|
||||
block: T.nilable(T.proc.params(job: Job::PostFileJob).void)
|
||||
).returns(T.any(Job::PostFileJob, FalseClass))
|
||||
end
|
||||
def perform_later(args, &block); end
|
||||
|
||||
sig { params(args: T::Hash[::Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform_now(args); end
|
||||
end
|
||||
end
|
||||
@@ -34,6 +34,7 @@ RSpec.describe Domain::PostsController, type: :controller do
|
||||
|
||||
context "with an image URL" do
|
||||
let(:mock_hash_value) { "1010101010101010" }
|
||||
let(:mock_detail_hash_value) { "0101010101010101" }
|
||||
let(:mock_fingerprints) { Domain::PostFile::BitFingerprint.none }
|
||||
let(:temp_file_path) { "/tmp/test_image.jpg" }
|
||||
|
||||
@@ -51,9 +52,15 @@ RSpec.describe Domain::PostsController, type: :controller do
|
||||
:from_file_path,
|
||||
).with(temp_file_path).and_return(mock_hash_value)
|
||||
|
||||
# Add expectation for detail fingerprint
|
||||
expect(Domain::PostFile::BitFingerprint).to receive(
|
||||
:order_by_fingerprint_distance,
|
||||
).with(mock_hash_value).and_return(mock_fingerprints)
|
||||
:detail_from_file_path,
|
||||
).with(temp_file_path).and_return(mock_detail_hash_value)
|
||||
|
||||
# Mock the similar fingerprints search
|
||||
expect(controller).to receive(:find_similar_fingerprints).with(
|
||||
mock_hash_value,
|
||||
).and_return(mock_fingerprints)
|
||||
|
||||
post :visual_results,
|
||||
params: {
|
||||
@@ -62,6 +69,16 @@ RSpec.describe Domain::PostsController, type: :controller do
|
||||
|
||||
# Just verify the template was rendered
|
||||
expect(response).to render_template(:visual_results)
|
||||
|
||||
# Verify that the correct instance variables are set
|
||||
expect(assigns(:uploaded_image_data_uri)).to eq(
|
||||
"data:image/jpeg;base64,FAKE",
|
||||
)
|
||||
expect(assigns(:uploaded_hash_value)).to eq(mock_hash_value)
|
||||
expect(assigns(:uploaded_detail_hash_value)).to eq(
|
||||
mock_detail_hash_value,
|
||||
)
|
||||
expect(assigns(:post_file_fingerprints)).to eq(mock_fingerprints.to_a)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -49,7 +49,15 @@ describe Domain::Inkbunny::Job::StaticFileJob do
|
||||
uri: FileJobSpec::AN_IMAGE_URL,
|
||||
status_code: 200,
|
||||
content_type: "image/png",
|
||||
contents: SpecUtil.read_fixture_file(FileJobSpec::AN_IMAGE_PATH),
|
||||
contents:
|
||||
File.binread(
|
||||
Rails
|
||||
.root
|
||||
.join(
|
||||
"test/fixtures/files/images/thumb-036aaab6-low-quality.jpeg",
|
||||
)
|
||||
.to_s,
|
||||
),
|
||||
caused_by_entry:,
|
||||
},
|
||||
],
|
||||
@@ -115,7 +123,7 @@ describe Domain::Inkbunny::Job::StaticFileJob do
|
||||
file.reload
|
||||
expect(file.log_entry).to eq(log_entries[1])
|
||||
expect(file.state).to eq("ok")
|
||||
expect(file.retry_count).to eq(0)
|
||||
expect(file.retry_count).to eq(1)
|
||||
expect(file.last_status_code).to eq(200)
|
||||
end
|
||||
|
||||
|
||||
@@ -116,16 +116,43 @@ RSpec.describe LoadedMedia do
|
||||
expect(File.size(output_path)).to be > 0
|
||||
end
|
||||
|
||||
it "creates different frames", quiet: false do
|
||||
it "creates different frames" do
|
||||
media = LoadedMedia.from_file("video/webm", webm_fixture_path)
|
||||
|
||||
output_path_1 = make_output_path(0)
|
||||
output_path_2 = make_output_path(453)
|
||||
media.write_frame_thumbnail(0, output_path_1, thumbnail_options)
|
||||
media.write_frame_thumbnail(453, output_path_2, thumbnail_options)
|
||||
paths =
|
||||
[0, 453, media.num_frames - 1].map do |frame|
|
||||
output_path = make_output_path(frame)
|
||||
media.write_frame_thumbnail(frame, output_path, thumbnail_options)
|
||||
output_path
|
||||
end
|
||||
|
||||
expect(output_path_1).not_to eq(output_path_2)
|
||||
expect(FileUtils.compare_file(output_path_1, output_path_2)).to be false
|
||||
expect(paths.uniq.length).to eq(3)
|
||||
expect(FileUtils.compare_file(paths[0], paths[1])).to be false
|
||||
expect(FileUtils.compare_file(paths[0], paths[2])).to be false
|
||||
expect(FileUtils.compare_file(paths[1], paths[2])).to be false
|
||||
end
|
||||
|
||||
it "works with files that have a strange last frame", quiet: false do
|
||||
media =
|
||||
LoadedMedia.from_file(
|
||||
"video/webm",
|
||||
Rails
|
||||
.root
|
||||
.join("test/fixtures/files/images/last-frame-weird-ts.webm")
|
||||
.to_s,
|
||||
)
|
||||
|
||||
paths =
|
||||
[0, 50, media.num_frames - 1].map do |frame|
|
||||
output_path = make_output_path(frame)
|
||||
media.write_frame_thumbnail(frame, output_path, thumbnail_options)
|
||||
output_path
|
||||
end
|
||||
|
||||
expect(paths.uniq.length).to eq(3)
|
||||
expect(FileUtils.compare_file(paths[0], paths[1])).to be false
|
||||
expect(FileUtils.compare_file(paths[0], paths[2])).to be false
|
||||
expect(FileUtils.compare_file(paths[1], paths[2])).to be false
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
BIN
test/fixtures/files/images/last-frame-weird-ts.webm
vendored
Normal file
BIN
test/fixtures/files/images/last-frame-weird-ts.webm
vendored
Normal file
Binary file not shown.
BIN
test/fixtures/files/images/test.webm
vendored
Normal file
BIN
test/fixtures/files/images/test.webm
vendored
Normal file
Binary file not shown.
Reference in New Issue
Block a user