more fingerprint

This commit is contained in:
Dylan Knutson
2025-03-05 19:40:33 +00:00
parent 1ef786fa8e
commit 99809041a5
23 changed files with 658 additions and 96 deletions

View File

@@ -37,3 +37,4 @@
- [ ] Use links found in descriptions to indicate re-scanning a post? (e.g. for comic next/prev links)
- [ ] fix for IDs that have a dot in them - e.g. https://refurrer.com/users/fa@jakke.
- [ ] Rich inline links to e621 e.g. https://refurrer.com/posts/fa@60070060
- [ ] Find FaPost that have favs recorded but no scan / file, enqueue scan

View File

@@ -230,20 +230,17 @@ class Domain::PostsController < DomainController
# Generate a fingerprint from the image path
sig { params(image_path: String).returns(String) }
def generate_fingerprint(image_path)
::DHashVips::IDHash.fingerprint(image_path).to_s(2).rjust(256, "0")
# Use the new from_file_path method to create a fingerprint
fingerprint = Domain::PostFileFingerprint.from_file_path(image_path)
# The hash_value is guaranteed to be present by the from_file_path implementation
T.must(fingerprint.hash_value)
end
# Find similar images based on the fingerprint
sig { params(hash_value: String).returns(ActiveRecord::Relation) }
def find_similar_fingerprints(hash_value)
Domain::PostFileFingerprint
.includes(post_file: :post)
.order(
Arel.sql(
"(hash_value <~> '#{ActiveRecord::Base.connection.quote_string(hash_value)}') ASC",
),
)
.limit(10)
# Use the model's similar_to_fingerprint method directly
Domain::PostFileFingerprint.similar_to_fingerprint(hash_value).limit(10)
end
sig { override.returns(DomainController::DomainParamConfig) }

View File

@@ -12,7 +12,8 @@ class Domain::PostFile < ReduxApplicationRecord
has_one :fingerprint,
class_name: "::Domain::PostFileFingerprint",
foreign_key: :post_file_id,
foreign_key: :blob_sha256,
primary_key: :blob_sha256,
dependent: :destroy,
inverse_of: :post_file
@@ -40,6 +41,21 @@ class Domain::PostFile < ReduxApplicationRecord
self.type ||= self.class.name if new_record?
end
after_save do
if self.fingerprint.nil? && (blob = self.blob) &&
(content_type = blob.content_type) &&
(
Domain::PostFileFingerprint::VALID_CONTENT_TYPES.any? do |type|
content_type.match?(type)
end
)
fingerprint = Domain::PostFileFingerprint.from_post_file(self)
fingerprint&.save!
end
rescue => e
logger.error("could not save fingerprint for post_file #{self.id}: #{e}")
end
sig { returns(T.nilable(BlobFile)) }
def blob
super ||

View File

@@ -4,12 +4,16 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
self.table_name = "domain_post_file_fingerprints"
belongs_to :post_file,
foreign_key: :post_file_id,
foreign_key: :blob_sha256,
primary_key: :blob_sha256,
class_name: "::Domain::PostFile",
inverse_of: :fingerprint
validates :hash_value, presence: true
# in bytes
HASH_SIZE = 32
VALID_CONTENT_TYPES =
T.let(
[
@@ -33,7 +37,17 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
end
fingerprint = DHashVips::IDHash.fingerprint(path)
self.hash_value = fingerprint.to_s(2).rjust(256, "0")
self.hash_value = fingerprint.to_s(2).rjust(HASH_SIZE * 8, "0")
end
# Find similar images based on the fingerprint
sig { params(fingerprint: String).returns(ActiveRecord::Relation) }
def self.similar_to_fingerprint(fingerprint)
includes(post_file: :post).order(
Arel.sql(
"(hash_value <~> '#{ActiveRecord::Base.connection.quote_string(fingerprint)}') ASC",
),
)
end
# Calculate the Hamming distance between this fingerprint and another fingerprint
@@ -59,11 +73,7 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
params(hash_value1: String, hash_value2: String).returns(T.nilable(Integer))
end
def self.hamming_distance(hash_value1, hash_value2)
hash_value1
.split("")
.zip(hash_value2.split(""))
.map { |a, b| a.to_i ^ b.to_i }
.sum
hash_value1.chars.zip(hash_value2.chars).count { |c1, c2| c1 != c2 }
end
# Calculate the similarity percentage between this fingerprint and another fingerprint
@@ -79,10 +89,67 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
return nil unless distance
# Maximum possible distance for a 256-bit hash
max_distance = 256
max_distance = HASH_SIZE * 8
# Calculate similarity percentage based on distance
result = ((max_distance - distance) / max_distance.to_f * 100).round(1)
# Ensure the return type is Float
Float(result)
end
sig do
params(post_file: Domain::PostFile).returns(
T.nilable(Domain::PostFileFingerprint),
)
end
def self.from_post_file(post_file)
blob_file_path = post_file.blob&.absolute_file_path
content_type = post_file.blob&.content_type
return nil unless blob_file_path
return nil unless content_type
unless VALID_CONTENT_TYPES.any? { |type| content_type.match?(type) }
return nil
end
model = from_file_path(blob_file_path)
model.post_file = post_file
model
end
# Create a PostFileFingerprint instance from a file path
# @param file_path [String] Path to the image file
# @return [Domain::PostFileFingerprint] A non-persisted fingerprint model
sig { params(file_path: String).returns(Domain::PostFileFingerprint) }
def self.from_file_path(file_path)
unless File.exist?(file_path)
raise ArgumentError, "File does not exist: #{file_path}"
end
fingerprint = DHashVips::IDHash.fingerprint(file_path)
from_dhash_fingerprint(fingerprint)
end
# Create a PostFileFingerprint instance from a Vips::Image
# @param vips_image [Vips::Image] Vips image object
# @return [Domain::PostFileFingerprint] A non-persisted fingerprint model
sig { params(vips_image: T.untyped).returns(Domain::PostFileFingerprint) }
def self.from_vips_image(vips_image)
# Generate fingerprint directly from the Vips::Image object
fingerprint = DHashVips::IDHash.fingerprint(vips_image)
from_dhash_fingerprint(fingerprint)
end
private
# Create a PostFileFingerprint instance from a DHashVips fingerprint
# @param fingerprint [Object] DHashVips fingerprint object
# @return [Domain::PostFileFingerprint] A non-persisted fingerprint model
sig { params(fingerprint: T.untyped).returns(Domain::PostFileFingerprint) }
def self.from_dhash_fingerprint(fingerprint)
# Convert the numeric fingerprint to a binary string and pad to the correct length
# HASH_SIZE = 32 (bytes) * 8 = 256 bits
hash_value = fingerprint.to_s(2).rjust(HASH_SIZE * 8, "0")
new_fingerprint = new
new_fingerprint.hash_value = hash_value
new_fingerprint
end
end

View File

@@ -1,15 +1,9 @@
class CreateDomainPostFileThumbnails < ActiveRecord::Migration[7.0]
def change
create_table :domain_post_file_fingerprints do |t|
t.references :post_file,
null: false,
foreign_key: {
to_table: :domain_post_files,
},
index: true
t.binary :blob_sha256, null: false, index: true
t.bit :hash_value, limit: 256
t.timestamps
t.index :hash_value, using: :hnsw, opclass: :bit_hamming_ops
end
end

View File

@@ -2702,7 +2702,7 @@ ALTER SEQUENCE public.domain_inkbunny_users_id_seq OWNED BY public.domain_inkbun
CREATE TABLE public.domain_post_file_fingerprints (
id bigint NOT NULL,
post_file_id bigint NOT NULL,
blob_sha256 bytea NOT NULL,
hash_value bit(256),
created_at timestamp(6) without time zone NOT NULL,
updated_at timestamp(6) without time zone NOT NULL
@@ -7018,6 +7018,13 @@ CREATE UNIQUE INDEX index_domain_inkbunny_users_on_ib_user_id ON public.domain_i
CREATE INDEX index_domain_inkbunny_users_on_shallow_update_log_entry_id ON public.domain_inkbunny_users USING btree (shallow_update_log_entry_id);
--
-- Name: index_domain_post_file_fingerprints_on_blob_sha256; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_domain_post_file_fingerprints_on_blob_sha256 ON public.domain_post_file_fingerprints USING btree (blob_sha256);
--
-- Name: index_domain_post_file_fingerprints_on_hash_value; Type: INDEX; Schema: public; Owner: -
--
@@ -7025,13 +7032,6 @@ CREATE INDEX index_domain_inkbunny_users_on_shallow_update_log_entry_id ON publi
CREATE INDEX index_domain_post_file_fingerprints_on_hash_value ON public.domain_post_file_fingerprints USING hnsw (hash_value public.bit_hamming_ops);
--
-- Name: index_domain_post_file_fingerprints_on_post_file_id; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_domain_post_file_fingerprints_on_post_file_id ON public.domain_post_file_fingerprints USING btree (post_file_id);
SET default_tablespace = mirai;
--
@@ -8527,14 +8527,6 @@ ALTER TABLE ONLY public.domain_fa_user_avatars
ADD CONSTRAINT fk_rails_2a03f31297 FOREIGN KEY (log_entry_id) REFERENCES public.http_log_entries(id);
--
-- Name: domain_post_file_fingerprints fk_rails_2f27fdde74; Type: FK CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.domain_post_file_fingerprints
ADD CONSTRAINT fk_rails_2f27fdde74 FOREIGN KEY (post_file_id) REFERENCES public.domain_post_files(id);
--
-- Name: domain_inkbunny_files fk_rails_31a33e433e; Type: FK CONSTRAINT; Schema: public; Owner: -
--

View File

@@ -44,6 +44,7 @@ class ApplicationController
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper

View File

@@ -41,6 +41,7 @@ class DeviseController
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper

View File

@@ -640,6 +640,51 @@ class Domain::PostFileFingerprint
end
module GeneratedAttributeMethods
sig { returns(T.nilable(::String)) }
def blob_sha256; end
sig { params(value: T.nilable(::String)).returns(T.nilable(::String)) }
def blob_sha256=(value); end
sig { returns(T::Boolean) }
def blob_sha256?; end
sig { returns(T.nilable(::String)) }
def blob_sha256_before_last_save; end
sig { returns(T.untyped) }
def blob_sha256_before_type_cast; end
sig { returns(T::Boolean) }
def blob_sha256_came_from_user?; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def blob_sha256_change; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def blob_sha256_change_to_be_saved; end
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
def blob_sha256_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
sig { returns(T.nilable(::String)) }
def blob_sha256_in_database; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def blob_sha256_previous_change; end
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
def blob_sha256_previously_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
sig { returns(T.nilable(::String)) }
def blob_sha256_previously_was; end
sig { returns(T.nilable(::String)) }
def blob_sha256_was; end
sig { void }
def blob_sha256_will_change!; end
sig { returns(T.nilable(::ActiveSupport::TimeWithZone)) }
def created_at; end
@@ -830,50 +875,8 @@ class Domain::PostFileFingerprint
sig { void }
def id_will_change!; end
sig { returns(T.nilable(::Integer)) }
def post_file_id; end
sig { params(value: T.nilable(::Integer)).returns(T.nilable(::Integer)) }
def post_file_id=(value); end
sig { returns(T::Boolean) }
def post_file_id?; end
sig { returns(T.nilable(::Integer)) }
def post_file_id_before_last_save; end
sig { returns(T.untyped) }
def post_file_id_before_type_cast; end
sig { returns(T::Boolean) }
def post_file_id_came_from_user?; end
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
def post_file_id_change; end
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
def post_file_id_change_to_be_saved; end
sig { params(from: T.nilable(::Integer), to: T.nilable(::Integer)).returns(T::Boolean) }
def post_file_id_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
sig { returns(T.nilable(::Integer)) }
def post_file_id_in_database; end
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
def post_file_id_previous_change; end
sig { params(from: T.nilable(::Integer), to: T.nilable(::Integer)).returns(T::Boolean) }
def post_file_id_previously_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
sig { returns(T.nilable(::Integer)) }
def post_file_id_previously_was; end
sig { returns(T.nilable(::Integer)) }
def post_file_id_was; end
sig { void }
def post_file_id_will_change!; end
def restore_blob_sha256!; end
sig { void }
def restore_created_at!; end
@@ -887,12 +890,15 @@ class Domain::PostFileFingerprint
sig { void }
def restore_id_value!; end
sig { void }
def restore_post_file_id!; end
sig { void }
def restore_updated_at!; end
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
def saved_change_to_blob_sha256; end
sig { returns(T::Boolean) }
def saved_change_to_blob_sha256?; end
sig { returns(T.nilable([T.nilable(::ActiveSupport::TimeWithZone), T.nilable(::ActiveSupport::TimeWithZone)])) }
def saved_change_to_created_at; end
@@ -917,12 +923,6 @@ class Domain::PostFileFingerprint
sig { returns(T::Boolean) }
def saved_change_to_id_value?; end
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
def saved_change_to_post_file_id; end
sig { returns(T::Boolean) }
def saved_change_to_post_file_id?; end
sig { returns(T.nilable([T.nilable(::ActiveSupport::TimeWithZone), T.nilable(::ActiveSupport::TimeWithZone)])) }
def saved_change_to_updated_at; end
@@ -984,6 +984,9 @@ class Domain::PostFileFingerprint
sig { void }
def updated_at_will_change!; end
sig { returns(T::Boolean) }
def will_save_change_to_blob_sha256?; end
sig { returns(T::Boolean) }
def will_save_change_to_created_at?; end
@@ -996,9 +999,6 @@ class Domain::PostFileFingerprint
sig { returns(T::Boolean) }
def will_save_change_to_id_value?; end
sig { returns(T::Boolean) }
def will_save_change_to_post_file_id?; end
sig { returns(T::Boolean) }
def will_save_change_to_updated_at?; end
end

View File

@@ -236,4 +236,10 @@ module GeneratedPathHelpersModule
sig { params(args: T.untyped).returns(String) }
def user_session_path(*args); end
sig { params(args: T.untyped).returns(String) }
def visual_results_domain_posts_path(*args); end
sig { params(args: T.untyped).returns(String) }
def visual_search_domain_posts_path(*args); end
end

View File

@@ -236,4 +236,10 @@ module GeneratedUrlHelpersModule
sig { params(args: T.untyped).returns(String) }
def user_session_url(*args); end
sig { params(args: T.untyped).returns(String) }
def visual_results_domain_posts_url(*args); end
sig { params(args: T.untyped).returns(String) }
def visual_search_domain_posts_url(*args); end
end

View File

@@ -44,6 +44,7 @@ class Rails::ApplicationController
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper

View File

@@ -44,6 +44,7 @@ class Rails::Conductor::BaseController
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper

View File

@@ -44,6 +44,7 @@ class Rails::HealthController
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper

View File

@@ -0,0 +1,73 @@
# typed: false
require "rails_helper"
RSpec.describe Domain::PostsController, type: :controller do
# Create a real user with admin role
let(:user) { create(:user, :admin) }
before do
# Sign in the user
sign_in user
# Mock authorization to allow all actions
allow(controller).to receive(:authorize).and_return(true)
end
# ============================================================
# Test the controller's actual actions and their behavior
# ============================================================
describe "GET #visual_search" do
it "returns a successful response and renders the visual_search template" do
get :visual_search
expect(response).to be_successful
expect(response).to render_template(:visual_search)
end
end
describe "POST #visual_results" do
context "with no image provided" do
it "renders the visual_search template" do
post :visual_results
expect(response).to render_template(:visual_search)
end
end
context "with an image URL" do
let(:mock_hash_value) { "1010101010101010" }
let(:mock_fingerprints) { Domain::PostFileFingerprint.none }
let(:temp_file_path) { "/tmp/test_image.jpg" }
it "uses PostFileFingerprint model methods for fingerprinting and finding similar images" do
# We need to mock the image downloading and processing since we can't do that in tests
allow(controller).to receive(:process_image_input).and_return(
[temp_file_path, "image/jpeg"],
)
allow(controller).to receive(:create_thumbnail).and_return(
"data:image/jpeg;base64,FAKE",
)
# Set up expectations for our model methods - this is what we're really testing
expect(Domain::PostFileFingerprint).to receive(:from_file_path).with(
temp_file_path,
).and_return(
instance_double(
Domain::PostFileFingerprint,
hash_value: mock_hash_value,
),
)
expect(Domain::PostFileFingerprint).to receive(
:similar_to_fingerprint,
).with(mock_hash_value).and_return(mock_fingerprints)
post :visual_results,
params: {
image_url: "https://example.com/image.jpg",
}
# Just verify the template was rendered
expect(response).to render_template(:visual_results)
end
end
end
end

View File

@@ -50,4 +50,39 @@ FactoryBot.define do
end
end
end
trait :image_file do
file_name { "image.jpg" }
url_str { "https://example.com/image.jpg" }
file_order { 1 }
md5_initial { "d41d8cd98f00b204e9800998ecf8427e" }
md5_full { "d41d8cd98f00b204e9800998ecf8427e" }
before(:create) do
self.log_entry =
create(
:blob_file,
content_type: "image/jpeg",
contents:
File.read(
Rails.root.join(
"test/fixtures/files/images/thumb-036aaab6-low-quality.jpeg",
),
),
)
end
end
end
FactoryBot.define do
factory :domain_post_file_inkbunny_post_file,
class: "Domain::PostFile::InkbunnyPostFile",
parent: :domain_post_file do
association :post, factory: :domain_post_fa_post
sequence(:ib_id) { |n| 12_345 + n }
sequence(:file_name) { |n| "image#{n}.jpg" }
sequence(:url_str) { |n| "https://example.com/image#{n}.jpg" }
file_order { 1 }
md5_initial { "d41d8cd98f00b204e9800998ecf8427e" }
md5_full { "d41d8cd98f00b204e9800998ecf8427e" }
end
end

View File

@@ -0,0 +1,261 @@
# typed: false
require "rails_helper"
RSpec.describe Domain::PostFileFingerprint, type: :model do
describe ".similar_to_fingerprint" do
let(:image_paths) do
{
# Map original images to their low-quality versions
"images/thumb-036aaab6-content-container.jpeg" =>
"images/thumb-036aaab6-low-quality.jpeg",
"images/thumb-ac63d9d7-content-container.jpeg" =>
"images/thumb-ac63d9d7-low-quality.jpeg",
"images/thumb-c8feb8a92-content-container.jpeg" =>
"images/thumb-c8feb8a92-low-quality.jpeg",
}
end
before(:each) do
# Create posts and post_files with the original images
@fingerprints = []
image_paths.keys.each_with_index do |image_path, index|
# Create post and attach the original image
post = create(:domain_post_fa_post)
# Read the image file content
image_content =
File.read(
Rails.root.join("test/fixtures/files", image_path),
mode: "rb",
)
# Create a blob file with the image content
blob =
create(
:blob_file,
content_bytes: image_content,
content_type: "image/jpeg",
sha256: Digest::SHA256.digest(image_content),
)
# Create a post file with the blob
post_file =
create(
:domain_post_file,
post: post,
state: "ok",
blob_sha256: blob.sha256,
)
# Create a fingerprint for the post file
# The fingerprint should be automatically calculated in the before_validation callback
fingerprint = Domain::PostFileFingerprint.create!(post_file: post_file)
@fingerprints << fingerprint
end
end
it "returns similar images in order of similarity" do
# For each low-quality image, generate a fingerprint and test that similar_to_fingerprint
# returns the correct original image fingerprint first
image_paths.each_with_index do |(original_path, low_quality_path), index|
# Read the low-quality image
low_quality_image_path =
Rails.root.join("test/fixtures/files", low_quality_path)
# Generate a fingerprint for the low-quality image
fingerprint = DHashVips::IDHash.fingerprint(low_quality_image_path.to_s)
hash_value =
fingerprint.to_s(2).rjust(
Domain::PostFileFingerprint::HASH_SIZE * 8,
"0",
)
# Find similar fingerprints
similar_fingerprints =
Domain::PostFileFingerprint.similar_to_fingerprint(hash_value)
# The original image's fingerprint should be in the top results
# The PostgreSQL operator <~> (hamming distance) may produce ties
# when two fingerprints have the exact same distance, resulting in
# arbitrary ordering among equal-distance results
expected_fingerprint = @fingerprints[index]
found_index = similar_fingerprints.find_index(expected_fingerprint)
# Get the distance for the first result to compare with our expected result
first_result_distance =
Domain::PostFileFingerprint.hamming_distance(
hash_value,
similar_fingerprints.first.hash_value,
)
# Get the distance for our expected fingerprint
expected_distance =
Domain::PostFileFingerprint.hamming_distance(
hash_value,
expected_fingerprint.hash_value,
)
# Verify our expected fingerprint is in the results and has the same or
# very close distance to the first result
expect(similar_fingerprints).to include(expected_fingerprint)
expect(found_index).to be <= 1,
"Expected fingerprint #{expected_fingerprint.id} should be among the first two results"
expect(expected_distance).to be_within(1).of(first_result_distance),
"Expected distance (#{expected_distance}) should match the first result's distance (#{first_result_distance})"
# Calculate similarity for verification
similarity =
similar_fingerprints.first.similarity_percentage_to(
@fingerprints[index],
)
expect(similarity).to be > 70 # Expect at least 70% similarity
# Check that this low-quality image has low similarity with OTHER original images
other_fingerprint_indices = (0...@fingerprints.size).to_a - [index]
other_fingerprint_indices.each do |other_index|
# Generate a fingerprint from the low-quality image
low_quality_fingerprint = Domain::PostFileFingerprint.new
low_quality_fingerprint.hash_value = hash_value
# Compare with an original image that it should NOT match
other_similarity =
low_quality_fingerprint.similarity_percentage_to(
@fingerprints[other_index],
)
# Should have lower similarity to non-matching original images
expect(other_similarity).to be < 70,
"Low-quality version of image #{index} should not be similar to original image #{other_index}"
end
end
end
it "calculates correct hamming distance between fingerprints" do
# Test the hamming_distance_to method
reference_fingerprint = @fingerprints.first
# A fingerprint should have zero distance to itself
expect(
reference_fingerprint.hamming_distance_to(reference_fingerprint),
).to eq(0)
# Different fingerprints should have non-zero distance
expect(
reference_fingerprint.hamming_distance_to(@fingerprints[1]),
).to be > 0
# Test the hamming_distance class method
expect(
Domain::PostFileFingerprint.hamming_distance(
reference_fingerprint.hash_value,
@fingerprints[1].hash_value,
),
).to be > 0
end
it "calculates correct similarity percentage" do
# Test the similarity_percentage_to method
reference_fingerprint = @fingerprints.first
# A fingerprint should be 100% similar to itself
expect(
reference_fingerprint.similarity_percentage_to(reference_fingerprint),
).to eq(100)
# Different fingerprints should have less than 100% similarity
expect(
reference_fingerprint.similarity_percentage_to(@fingerprints[1]),
).to be < 100
end
end
describe ".from_file_path" do
it "creates a fingerprint from a file path" do
# Use one of the test images
file_path =
Rails
.root
.join(
"test/fixtures/files/images/thumb-036aaab6-content-container.jpeg",
)
.to_s
# Create a fingerprint from the file path
fingerprint = Domain::PostFileFingerprint.from_file_path(file_path)
# Check that the fingerprint was created properly
expect(fingerprint).to be_a(Domain::PostFileFingerprint)
expect(fingerprint.hash_value).to be_present
expect(fingerprint.hash_value.length).to eq(
Domain::PostFileFingerprint::HASH_SIZE * 8,
)
expect(fingerprint.persisted?).to be(false)
end
it "raises an error for non-existent files" do
expect {
Domain::PostFileFingerprint.from_file_path("/non/existent/file.jpg")
}.to raise_error(ArgumentError, /File does not exist/)
end
end
describe ".from_vips_image" do
it "creates a fingerprint from a Vips::Image" do
# Load a test image as a Vips::Image
file_path =
Rails
.root
.join(
"test/fixtures/files/images/thumb-036aaab6-content-container.jpeg",
)
.to_s
vips_image = Vips::Image.new_from_file(file_path)
# Create a fingerprint from the Vips::Image
fingerprint = Domain::PostFileFingerprint.from_vips_image(vips_image)
# Check that the fingerprint was created properly
expect(fingerprint).to be_a(Domain::PostFileFingerprint)
expect(fingerprint.hash_value).to be_present
expect(fingerprint.hash_value.length).to eq(
Domain::PostFileFingerprint::HASH_SIZE * 8,
)
expect(fingerprint.persisted?).to be(false)
end
it "generates fingerprints with high similarity to from_file_path for the same image" do
# This test verifies that fingerprints from vips_image and file_path are highly similar
# for the same image, even if not exactly identical due to format considerations
file_path =
Rails
.root
.join(
"test/fixtures/files/images/thumb-ac63d9d7-content-container.jpeg",
)
.to_s
# Create fingerprints using both methods
vips_image = Vips::Image.new_from_file(file_path)
from_vips_fingerprint =
Domain::PostFileFingerprint.from_vips_image(vips_image)
from_file_fingerprint =
Domain::PostFileFingerprint.from_file_path(file_path)
# The fingerprints might not be 100% identical due to how Vips handles direct images
# vs how it handles file loading, but they should have high similarity
similarity =
100 -
(
Domain::PostFileFingerprint.hamming_distance(
from_vips_fingerprint.hash_value,
from_file_fingerprint.hash_value,
).to_f / (Domain::PostFileFingerprint::HASH_SIZE * 8) * 100
)
# The similarity should be very high (above 90%)
expect(similarity).to be > 90
end
end
end

View File

@@ -0,0 +1,109 @@
# typed: false
require "rails_helper"
RSpec.describe Domain::PostFile, type: :model do
describe "after_save callback" do
it "creates and saves a fingerprint when saving a post file with a valid image blob" do
# Create a post
post = create(:domain_post_fa_post)
# Setup an image file
image_path =
Rails.root.join(
"test/fixtures/files/images/thumb-036aaab6-content-container.jpeg",
)
image_content = File.read(image_path, mode: "rb")
# Create blob with image content
blob =
create(
:blob_file,
content_bytes: image_content,
content_type: "image/jpeg",
sha256: Digest::SHA256.digest(image_content),
)
# Count fingerprints before creating the post file
fingerprint_count_before = Domain::PostFileFingerprint.count
# Create and save the post file with the blob reference
post_file =
create(
:domain_post_file,
post: post,
state: "ok",
blob_sha256: blob.sha256,
)
# Verify a fingerprint was automatically created and saved
expect(Domain::PostFileFingerprint.count).to eq(
fingerprint_count_before + 1,
)
expect(Domain::PostFileFingerprint.last.post_file).to eq(post_file)
# Verify the fingerprint has actual content
fingerprint = Domain::PostFileFingerprint.last
expect(fingerprint.hash_value).to be_present
expect(fingerprint.hash_value.length).to eq(
Domain::PostFileFingerprint::HASH_SIZE * 8,
)
end
it "does not create a fingerprint for non-image files" do
# Create a post
post = create(:domain_post_fa_post)
# Setup a text file
text_content = "This is a text file, not an image"
# Create blob with text content
blob =
create(
:blob_file,
content_bytes: text_content,
content_type: "text/plain",
sha256: Digest::SHA256.digest(text_content),
)
# Count fingerprints before creating the post file
fingerprint_count_before = Domain::PostFileFingerprint.count
# Create and save the post file with the blob reference
post_file =
create(
:domain_post_file,
post: post,
state: "ok",
blob_sha256: blob.sha256,
)
# Verify no new fingerprint was created
expect(Domain::PostFileFingerprint.count).to eq(fingerprint_count_before)
expect(
Domain::PostFileFingerprint.where(post_file: post_file).count,
).to eq(0)
end
it "does not create a fingerprint for files with missing blobs" do
# Create a post
post = create(:domain_post_fa_post)
# Count fingerprints before creating the post file
fingerprint_count_before = Domain::PostFileFingerprint.count
# Create post file without a blob
post_file =
create(
:domain_post_file,
post: post,
state: "pending", # No blob associated
)
# Verify no new fingerprint was created
expect(Domain::PostFileFingerprint.count).to eq(fingerprint_count_before)
expect(
Domain::PostFileFingerprint.where(post_file: post_file).count,
).to eq(0)
end
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 219 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB