Files
redux-scraper/app/helpers/domain/visual_search_helper.rb
2025-08-14 19:29:28 +00:00

191 lines
6.1 KiB
Ruby

# typed: strict
module Domain
module VisualSearchHelper
extend T::Sig
# Calculate the similarity percentage between two fingerprint hash values
# @param hash_value [String] The hash value to compare
# @param reference_hash_value [String] The reference hash value to compare against
# @return [Float] The similarity percentage between 0 and 100
sig do
params(hash_value: String, reference_hash_value: String).returns(Float)
end
def calculate_similarity_percentage(hash_value, reference_hash_value)
# Calculate hamming distance between the two hash values
distance =
DHashVips::IDHash.distance(
hash_value.to_i(2),
reference_hash_value.to_i(2),
)
max_distance = 256
# # Calculate similarity percentage based on distance
((max_distance - distance) / max_distance.to_f * 100).round(1).clamp(
0,
100,
)
end
# Determine the background color class based on similarity percentage
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
# @return [String] The Tailwind CSS background color class
sig { params(similarity_percentage: Float).returns(String) }
def match_badge_bg_color(similarity_percentage)
case similarity_percentage
when 90..100
"bg-green-600"
when 70..89
"bg-blue-600"
when 50..69
"bg-amber-500"
else
"bg-slate-700"
end
end
# Determine the text color class based on similarity percentage
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
# @return [String] The Tailwind CSS text color class
sig { params(similarity_percentage: Float).returns(String) }
def match_text_color(similarity_percentage)
case similarity_percentage
when 90..100
"text-green-700"
when 70..89
"text-blue-700"
when 50..69
"text-amber-700"
else
"text-slate-700"
end
end
# Get the CSS classes for the match percentage badge
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
# @return [String] The complete CSS classes for the match percentage badge
sig { params(similarity_percentage: Float).returns(String) }
def match_badge_classes(similarity_percentage)
"#{match_badge_bg_color(similarity_percentage)} text-white text-xs rounded-full px-3 py-1 shadow-md"
end
class SimilarFingerprintResult < T::Struct
include T::Struct::ActsAsComparable
const :fingerprint, Domain::PostFile::BitFingerprint
const :similarity_percentage, Float
end
class FingerprintAndDetail < T::Struct
include T::Struct::ActsAsComparable
const :fingerprint, String
const :detail_fingerprint, String
end
# Find similar images based on the fingerprint
sig do
params(
fingerprints: T::Array[FingerprintAndDetail],
limit: Integer,
oversearch: Integer,
includes: T.untyped,
).returns(T::Array[SimilarFingerprintResult])
end
def find_similar_fingerprints(
fingerprints,
limit: 32,
oversearch: 2,
includes: {}
)
ActiveRecord::Base.connection.execute("SET ivfflat.probes = 20")
results =
fingerprints.flat_map do |f|
Domain::PostFile::BitFingerprint
.order(
Arel.sql "(fingerprint_value <~> '#{ActiveRecord::Base.connection.quote_string(f.fingerprint)}')"
)
.limit(limit * oversearch)
.includes(includes)
.to_a
.uniq(&:post_file_id)
.map do |other_fingerprint|
SimilarFingerprintResult.new(
fingerprint: other_fingerprint,
similarity_percentage:
calculate_similarity_percentage(
f.detail_fingerprint,
T.must(other_fingerprint.fingerprint_detail_value),
),
)
end
.sort { |a, b| b.similarity_percentage <=> a.similarity_percentage }
.take(limit)
end
results
.group_by { |s| T.must(s.fingerprint.post_file_id) }
.map do |post_file_id, similar_fingerprints|
T.must(similar_fingerprints.max_by(&:similarity_percentage))
end
.sort_by(&:similarity_percentage)
.reverse
end
class GenerateFingerprintsResult < T::Struct
extend T::Sig
include T::Struct::ActsAsComparable
const :thumb_path, String
const :fingerprint, String
const :detail_fingerprint, String
sig { returns(FingerprintAndDetail) }
def to_fingerprint_and_detail
FingerprintAndDetail.new(
fingerprint: fingerprint,
detail_fingerprint: detail_fingerprint,
)
end
end
# Generate a fingerprint from the image path
sig do
params(image_path: String, content_type: String, tmp_dir: String).returns(
T.nilable(T::Array[GenerateFingerprintsResult]),
)
end
def generate_fingerprints(image_path, content_type, tmp_dir)
# Use the new from_file_path method to create a fingerprint
media = LoadedMedia.from_file(content_type, image_path)
return nil unless media
thumbnail_options =
LoadedMedia::ThumbnailOptions.new(
width: 128,
height: 128,
quality: 95,
size: :force,
interlace: false,
for_frames: [0.0, 0.1, 0.5, 0.9, 1.0],
)
frame_nums =
thumbnail_options
.for_frames
.map do |frame_fraction|
(frame_fraction * (media.num_frames - 1)).to_i
end
.uniq
.sort
frame_nums.map do |frame_num|
tmp_file = File.join(tmp_dir, "frame-#{frame_num}.jpg")
media.write_frame_thumbnail(frame_num, tmp_file, thumbnail_options)
GenerateFingerprintsResult.new(
thumb_path: tmp_file,
fingerprint:
Domain::PostFile::BitFingerprint.from_file_path(tmp_file),
detail_fingerprint:
Domain::PostFile::BitFingerprint.detail_from_file_path(tmp_file),
)
end
end
end
end