more fingerprint
This commit is contained in:
1
TODO.md
1
TODO.md
@@ -37,3 +37,4 @@
|
||||
- [ ] Use links found in descriptions to indicate re-scanning a post? (e.g. for comic next/prev links)
|
||||
- [ ] fix for IDs that have a dot in them - e.g. https://refurrer.com/users/fa@jakke.
|
||||
- [ ] Rich inline links to e621 e.g. https://refurrer.com/posts/fa@60070060
|
||||
- [ ] Find FaPost that have favs recorded but no scan / file, enqueue scan
|
||||
|
||||
@@ -230,20 +230,17 @@ class Domain::PostsController < DomainController
|
||||
# Generate a fingerprint from the image path
|
||||
sig { params(image_path: String).returns(String) }
|
||||
def generate_fingerprint(image_path)
|
||||
::DHashVips::IDHash.fingerprint(image_path).to_s(2).rjust(256, "0")
|
||||
# Use the new from_file_path method to create a fingerprint
|
||||
fingerprint = Domain::PostFileFingerprint.from_file_path(image_path)
|
||||
# The hash_value is guaranteed to be present by the from_file_path implementation
|
||||
T.must(fingerprint.hash_value)
|
||||
end
|
||||
|
||||
# Find similar images based on the fingerprint
|
||||
sig { params(hash_value: String).returns(ActiveRecord::Relation) }
|
||||
def find_similar_fingerprints(hash_value)
|
||||
Domain::PostFileFingerprint
|
||||
.includes(post_file: :post)
|
||||
.order(
|
||||
Arel.sql(
|
||||
"(hash_value <~> '#{ActiveRecord::Base.connection.quote_string(hash_value)}') ASC",
|
||||
),
|
||||
)
|
||||
.limit(10)
|
||||
# Use the model's similar_to_fingerprint method directly
|
||||
Domain::PostFileFingerprint.similar_to_fingerprint(hash_value).limit(10)
|
||||
end
|
||||
|
||||
sig { override.returns(DomainController::DomainParamConfig) }
|
||||
|
||||
@@ -12,7 +12,8 @@ class Domain::PostFile < ReduxApplicationRecord
|
||||
|
||||
has_one :fingerprint,
|
||||
class_name: "::Domain::PostFileFingerprint",
|
||||
foreign_key: :post_file_id,
|
||||
foreign_key: :blob_sha256,
|
||||
primary_key: :blob_sha256,
|
||||
dependent: :destroy,
|
||||
inverse_of: :post_file
|
||||
|
||||
@@ -40,6 +41,21 @@ class Domain::PostFile < ReduxApplicationRecord
|
||||
self.type ||= self.class.name if new_record?
|
||||
end
|
||||
|
||||
after_save do
|
||||
if self.fingerprint.nil? && (blob = self.blob) &&
|
||||
(content_type = blob.content_type) &&
|
||||
(
|
||||
Domain::PostFileFingerprint::VALID_CONTENT_TYPES.any? do |type|
|
||||
content_type.match?(type)
|
||||
end
|
||||
)
|
||||
fingerprint = Domain::PostFileFingerprint.from_post_file(self)
|
||||
fingerprint&.save!
|
||||
end
|
||||
rescue => e
|
||||
logger.error("could not save fingerprint for post_file #{self.id}: #{e}")
|
||||
end
|
||||
|
||||
sig { returns(T.nilable(BlobFile)) }
|
||||
def blob
|
||||
super ||
|
||||
|
||||
@@ -4,12 +4,16 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
|
||||
self.table_name = "domain_post_file_fingerprints"
|
||||
|
||||
belongs_to :post_file,
|
||||
foreign_key: :post_file_id,
|
||||
foreign_key: :blob_sha256,
|
||||
primary_key: :blob_sha256,
|
||||
class_name: "::Domain::PostFile",
|
||||
inverse_of: :fingerprint
|
||||
|
||||
validates :hash_value, presence: true
|
||||
|
||||
# in bytes
|
||||
HASH_SIZE = 32
|
||||
|
||||
VALID_CONTENT_TYPES =
|
||||
T.let(
|
||||
[
|
||||
@@ -33,7 +37,17 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
fingerprint = DHashVips::IDHash.fingerprint(path)
|
||||
self.hash_value = fingerprint.to_s(2).rjust(256, "0")
|
||||
self.hash_value = fingerprint.to_s(2).rjust(HASH_SIZE * 8, "0")
|
||||
end
|
||||
|
||||
# Find similar images based on the fingerprint
|
||||
sig { params(fingerprint: String).returns(ActiveRecord::Relation) }
|
||||
def self.similar_to_fingerprint(fingerprint)
|
||||
includes(post_file: :post).order(
|
||||
Arel.sql(
|
||||
"(hash_value <~> '#{ActiveRecord::Base.connection.quote_string(fingerprint)}') ASC",
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
# Calculate the Hamming distance between this fingerprint and another fingerprint
|
||||
@@ -59,11 +73,7 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
|
||||
params(hash_value1: String, hash_value2: String).returns(T.nilable(Integer))
|
||||
end
|
||||
def self.hamming_distance(hash_value1, hash_value2)
|
||||
hash_value1
|
||||
.split("")
|
||||
.zip(hash_value2.split(""))
|
||||
.map { |a, b| a.to_i ^ b.to_i }
|
||||
.sum
|
||||
hash_value1.chars.zip(hash_value2.chars).count { |c1, c2| c1 != c2 }
|
||||
end
|
||||
|
||||
# Calculate the similarity percentage between this fingerprint and another fingerprint
|
||||
@@ -79,10 +89,67 @@ class Domain::PostFileFingerprint < ReduxApplicationRecord
|
||||
return nil unless distance
|
||||
|
||||
# Maximum possible distance for a 256-bit hash
|
||||
max_distance = 256
|
||||
max_distance = HASH_SIZE * 8
|
||||
# Calculate similarity percentage based on distance
|
||||
result = ((max_distance - distance) / max_distance.to_f * 100).round(1)
|
||||
# Ensure the return type is Float
|
||||
Float(result)
|
||||
end
|
||||
|
||||
sig do
|
||||
params(post_file: Domain::PostFile).returns(
|
||||
T.nilable(Domain::PostFileFingerprint),
|
||||
)
|
||||
end
|
||||
def self.from_post_file(post_file)
|
||||
blob_file_path = post_file.blob&.absolute_file_path
|
||||
content_type = post_file.blob&.content_type
|
||||
return nil unless blob_file_path
|
||||
return nil unless content_type
|
||||
unless VALID_CONTENT_TYPES.any? { |type| content_type.match?(type) }
|
||||
return nil
|
||||
end
|
||||
model = from_file_path(blob_file_path)
|
||||
model.post_file = post_file
|
||||
model
|
||||
end
|
||||
|
||||
# Create a PostFileFingerprint instance from a file path
|
||||
# @param file_path [String] Path to the image file
|
||||
# @return [Domain::PostFileFingerprint] A non-persisted fingerprint model
|
||||
sig { params(file_path: String).returns(Domain::PostFileFingerprint) }
|
||||
def self.from_file_path(file_path)
|
||||
unless File.exist?(file_path)
|
||||
raise ArgumentError, "File does not exist: #{file_path}"
|
||||
end
|
||||
|
||||
fingerprint = DHashVips::IDHash.fingerprint(file_path)
|
||||
from_dhash_fingerprint(fingerprint)
|
||||
end
|
||||
|
||||
# Create a PostFileFingerprint instance from a Vips::Image
|
||||
# @param vips_image [Vips::Image] Vips image object
|
||||
# @return [Domain::PostFileFingerprint] A non-persisted fingerprint model
|
||||
sig { params(vips_image: T.untyped).returns(Domain::PostFileFingerprint) }
|
||||
def self.from_vips_image(vips_image)
|
||||
# Generate fingerprint directly from the Vips::Image object
|
||||
fingerprint = DHashVips::IDHash.fingerprint(vips_image)
|
||||
from_dhash_fingerprint(fingerprint)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Create a PostFileFingerprint instance from a DHashVips fingerprint
|
||||
# @param fingerprint [Object] DHashVips fingerprint object
|
||||
# @return [Domain::PostFileFingerprint] A non-persisted fingerprint model
|
||||
sig { params(fingerprint: T.untyped).returns(Domain::PostFileFingerprint) }
|
||||
def self.from_dhash_fingerprint(fingerprint)
|
||||
# Convert the numeric fingerprint to a binary string and pad to the correct length
|
||||
# HASH_SIZE = 32 (bytes) * 8 = 256 bits
|
||||
hash_value = fingerprint.to_s(2).rjust(HASH_SIZE * 8, "0")
|
||||
|
||||
new_fingerprint = new
|
||||
new_fingerprint.hash_value = hash_value
|
||||
new_fingerprint
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,15 +1,9 @@
|
||||
class CreateDomainPostFileThumbnails < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :domain_post_file_fingerprints do |t|
|
||||
t.references :post_file,
|
||||
null: false,
|
||||
foreign_key: {
|
||||
to_table: :domain_post_files,
|
||||
},
|
||||
index: true
|
||||
t.binary :blob_sha256, null: false, index: true
|
||||
t.bit :hash_value, limit: 256
|
||||
t.timestamps
|
||||
|
||||
t.index :hash_value, using: :hnsw, opclass: :bit_hamming_ops
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2702,7 +2702,7 @@ ALTER SEQUENCE public.domain_inkbunny_users_id_seq OWNED BY public.domain_inkbun
|
||||
|
||||
CREATE TABLE public.domain_post_file_fingerprints (
|
||||
id bigint NOT NULL,
|
||||
post_file_id bigint NOT NULL,
|
||||
blob_sha256 bytea NOT NULL,
|
||||
hash_value bit(256),
|
||||
created_at timestamp(6) without time zone NOT NULL,
|
||||
updated_at timestamp(6) without time zone NOT NULL
|
||||
@@ -7018,6 +7018,13 @@ CREATE UNIQUE INDEX index_domain_inkbunny_users_on_ib_user_id ON public.domain_i
|
||||
CREATE INDEX index_domain_inkbunny_users_on_shallow_update_log_entry_id ON public.domain_inkbunny_users USING btree (shallow_update_log_entry_id);
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_domain_post_file_fingerprints_on_blob_sha256; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE INDEX index_domain_post_file_fingerprints_on_blob_sha256 ON public.domain_post_file_fingerprints USING btree (blob_sha256);
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_domain_post_file_fingerprints_on_hash_value; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
@@ -7025,13 +7032,6 @@ CREATE INDEX index_domain_inkbunny_users_on_shallow_update_log_entry_id ON publi
|
||||
CREATE INDEX index_domain_post_file_fingerprints_on_hash_value ON public.domain_post_file_fingerprints USING hnsw (hash_value public.bit_hamming_ops);
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_domain_post_file_fingerprints_on_post_file_id; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE INDEX index_domain_post_file_fingerprints_on_post_file_id ON public.domain_post_file_fingerprints USING btree (post_file_id);
|
||||
|
||||
|
||||
SET default_tablespace = mirai;
|
||||
|
||||
--
|
||||
@@ -8527,14 +8527,6 @@ ALTER TABLE ONLY public.domain_fa_user_avatars
|
||||
ADD CONSTRAINT fk_rails_2a03f31297 FOREIGN KEY (log_entry_id) REFERENCES public.http_log_entries(id);
|
||||
|
||||
|
||||
--
|
||||
-- Name: domain_post_file_fingerprints fk_rails_2f27fdde74; Type: FK CONSTRAINT; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
ALTER TABLE ONLY public.domain_post_file_fingerprints
|
||||
ADD CONSTRAINT fk_rails_2f27fdde74 FOREIGN KEY (post_file_id) REFERENCES public.domain_post_files(id);
|
||||
|
||||
|
||||
--
|
||||
-- Name: domain_inkbunny_files fk_rails_31a33e433e; Type: FK CONSTRAINT; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
1
sorbet/rbi/dsl/application_controller.rbi
generated
1
sorbet/rbi/dsl/application_controller.rbi
generated
@@ -44,6 +44,7 @@ class ApplicationController
|
||||
include ::Domain::ModelHelper
|
||||
include ::Domain::PaginationHelper
|
||||
include ::Domain::PostGroupsHelper
|
||||
include ::Domain::VisualSearchHelper
|
||||
include ::DomainSourceHelper
|
||||
include ::GoodJobHelper
|
||||
include ::IpAddressHelper
|
||||
|
||||
1
sorbet/rbi/dsl/devise_controller.rbi
generated
1
sorbet/rbi/dsl/devise_controller.rbi
generated
@@ -41,6 +41,7 @@ class DeviseController
|
||||
include ::Domain::ModelHelper
|
||||
include ::Domain::PaginationHelper
|
||||
include ::Domain::PostGroupsHelper
|
||||
include ::Domain::VisualSearchHelper
|
||||
include ::DomainSourceHelper
|
||||
include ::GoodJobHelper
|
||||
include ::IpAddressHelper
|
||||
|
||||
110
sorbet/rbi/dsl/domain/post_file_fingerprint.rbi
generated
110
sorbet/rbi/dsl/domain/post_file_fingerprint.rbi
generated
@@ -640,6 +640,51 @@ class Domain::PostFileFingerprint
|
||||
end
|
||||
|
||||
module GeneratedAttributeMethods
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def blob_sha256; end
|
||||
|
||||
sig { params(value: T.nilable(::String)).returns(T.nilable(::String)) }
|
||||
def blob_sha256=(value); end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def blob_sha256?; end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def blob_sha256_before_last_save; end
|
||||
|
||||
sig { returns(T.untyped) }
|
||||
def blob_sha256_before_type_cast; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def blob_sha256_came_from_user?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def blob_sha256_change; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def blob_sha256_change_to_be_saved; end
|
||||
|
||||
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
|
||||
def blob_sha256_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def blob_sha256_in_database; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def blob_sha256_previous_change; end
|
||||
|
||||
sig { params(from: T.nilable(::String), to: T.nilable(::String)).returns(T::Boolean) }
|
||||
def blob_sha256_previously_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def blob_sha256_previously_was; end
|
||||
|
||||
sig { returns(T.nilable(::String)) }
|
||||
def blob_sha256_was; end
|
||||
|
||||
sig { void }
|
||||
def blob_sha256_will_change!; end
|
||||
|
||||
sig { returns(T.nilable(::ActiveSupport::TimeWithZone)) }
|
||||
def created_at; end
|
||||
|
||||
@@ -830,50 +875,8 @@ class Domain::PostFileFingerprint
|
||||
sig { void }
|
||||
def id_will_change!; end
|
||||
|
||||
sig { returns(T.nilable(::Integer)) }
|
||||
def post_file_id; end
|
||||
|
||||
sig { params(value: T.nilable(::Integer)).returns(T.nilable(::Integer)) }
|
||||
def post_file_id=(value); end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def post_file_id?; end
|
||||
|
||||
sig { returns(T.nilable(::Integer)) }
|
||||
def post_file_id_before_last_save; end
|
||||
|
||||
sig { returns(T.untyped) }
|
||||
def post_file_id_before_type_cast; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def post_file_id_came_from_user?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
|
||||
def post_file_id_change; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
|
||||
def post_file_id_change_to_be_saved; end
|
||||
|
||||
sig { params(from: T.nilable(::Integer), to: T.nilable(::Integer)).returns(T::Boolean) }
|
||||
def post_file_id_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(::Integer)) }
|
||||
def post_file_id_in_database; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
|
||||
def post_file_id_previous_change; end
|
||||
|
||||
sig { params(from: T.nilable(::Integer), to: T.nilable(::Integer)).returns(T::Boolean) }
|
||||
def post_file_id_previously_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(::Integer)) }
|
||||
def post_file_id_previously_was; end
|
||||
|
||||
sig { returns(T.nilable(::Integer)) }
|
||||
def post_file_id_was; end
|
||||
|
||||
sig { void }
|
||||
def post_file_id_will_change!; end
|
||||
def restore_blob_sha256!; end
|
||||
|
||||
sig { void }
|
||||
def restore_created_at!; end
|
||||
@@ -887,12 +890,15 @@ class Domain::PostFileFingerprint
|
||||
sig { void }
|
||||
def restore_id_value!; end
|
||||
|
||||
sig { void }
|
||||
def restore_post_file_id!; end
|
||||
|
||||
sig { void }
|
||||
def restore_updated_at!; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::String), T.nilable(::String)])) }
|
||||
def saved_change_to_blob_sha256; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def saved_change_to_blob_sha256?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::ActiveSupport::TimeWithZone), T.nilable(::ActiveSupport::TimeWithZone)])) }
|
||||
def saved_change_to_created_at; end
|
||||
|
||||
@@ -917,12 +923,6 @@ class Domain::PostFileFingerprint
|
||||
sig { returns(T::Boolean) }
|
||||
def saved_change_to_id_value?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::Integer), T.nilable(::Integer)])) }
|
||||
def saved_change_to_post_file_id; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def saved_change_to_post_file_id?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(::ActiveSupport::TimeWithZone), T.nilable(::ActiveSupport::TimeWithZone)])) }
|
||||
def saved_change_to_updated_at; end
|
||||
|
||||
@@ -984,6 +984,9 @@ class Domain::PostFileFingerprint
|
||||
sig { void }
|
||||
def updated_at_will_change!; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_blob_sha256?; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_created_at?; end
|
||||
|
||||
@@ -996,9 +999,6 @@ class Domain::PostFileFingerprint
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_id_value?; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_post_file_id?; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_updated_at?; end
|
||||
end
|
||||
|
||||
6
sorbet/rbi/dsl/generated_path_helpers_module.rbi
generated
6
sorbet/rbi/dsl/generated_path_helpers_module.rbi
generated
@@ -236,4 +236,10 @@ module GeneratedPathHelpersModule
|
||||
|
||||
sig { params(args: T.untyped).returns(String) }
|
||||
def user_session_path(*args); end
|
||||
|
||||
sig { params(args: T.untyped).returns(String) }
|
||||
def visual_results_domain_posts_path(*args); end
|
||||
|
||||
sig { params(args: T.untyped).returns(String) }
|
||||
def visual_search_domain_posts_path(*args); end
|
||||
end
|
||||
|
||||
6
sorbet/rbi/dsl/generated_url_helpers_module.rbi
generated
6
sorbet/rbi/dsl/generated_url_helpers_module.rbi
generated
@@ -236,4 +236,10 @@ module GeneratedUrlHelpersModule
|
||||
|
||||
sig { params(args: T.untyped).returns(String) }
|
||||
def user_session_url(*args); end
|
||||
|
||||
sig { params(args: T.untyped).returns(String) }
|
||||
def visual_results_domain_posts_url(*args); end
|
||||
|
||||
sig { params(args: T.untyped).returns(String) }
|
||||
def visual_search_domain_posts_url(*args); end
|
||||
end
|
||||
|
||||
1
sorbet/rbi/dsl/rails/application_controller.rbi
generated
1
sorbet/rbi/dsl/rails/application_controller.rbi
generated
@@ -44,6 +44,7 @@ class Rails::ApplicationController
|
||||
include ::Domain::ModelHelper
|
||||
include ::Domain::PaginationHelper
|
||||
include ::Domain::PostGroupsHelper
|
||||
include ::Domain::VisualSearchHelper
|
||||
include ::DomainSourceHelper
|
||||
include ::GoodJobHelper
|
||||
include ::IpAddressHelper
|
||||
|
||||
@@ -44,6 +44,7 @@ class Rails::Conductor::BaseController
|
||||
include ::Domain::ModelHelper
|
||||
include ::Domain::PaginationHelper
|
||||
include ::Domain::PostGroupsHelper
|
||||
include ::Domain::VisualSearchHelper
|
||||
include ::DomainSourceHelper
|
||||
include ::GoodJobHelper
|
||||
include ::IpAddressHelper
|
||||
|
||||
1
sorbet/rbi/dsl/rails/health_controller.rbi
generated
1
sorbet/rbi/dsl/rails/health_controller.rbi
generated
@@ -44,6 +44,7 @@ class Rails::HealthController
|
||||
include ::Domain::ModelHelper
|
||||
include ::Domain::PaginationHelper
|
||||
include ::Domain::PostGroupsHelper
|
||||
include ::Domain::VisualSearchHelper
|
||||
include ::DomainSourceHelper
|
||||
include ::GoodJobHelper
|
||||
include ::IpAddressHelper
|
||||
|
||||
73
spec/controllers/domain/posts_controller_spec.rb
Normal file
73
spec/controllers/domain/posts_controller_spec.rb
Normal file
@@ -0,0 +1,73 @@
|
||||
# typed: false
|
||||
require "rails_helper"
|
||||
|
||||
RSpec.describe Domain::PostsController, type: :controller do
|
||||
# Create a real user with admin role
|
||||
let(:user) { create(:user, :admin) }
|
||||
|
||||
before do
|
||||
# Sign in the user
|
||||
sign_in user
|
||||
# Mock authorization to allow all actions
|
||||
allow(controller).to receive(:authorize).and_return(true)
|
||||
end
|
||||
|
||||
# ============================================================
|
||||
# Test the controller's actual actions and their behavior
|
||||
# ============================================================
|
||||
|
||||
describe "GET #visual_search" do
|
||||
it "returns a successful response and renders the visual_search template" do
|
||||
get :visual_search
|
||||
expect(response).to be_successful
|
||||
expect(response).to render_template(:visual_search)
|
||||
end
|
||||
end
|
||||
|
||||
describe "POST #visual_results" do
|
||||
context "with no image provided" do
|
||||
it "renders the visual_search template" do
|
||||
post :visual_results
|
||||
expect(response).to render_template(:visual_search)
|
||||
end
|
||||
end
|
||||
|
||||
context "with an image URL" do
|
||||
let(:mock_hash_value) { "1010101010101010" }
|
||||
let(:mock_fingerprints) { Domain::PostFileFingerprint.none }
|
||||
let(:temp_file_path) { "/tmp/test_image.jpg" }
|
||||
|
||||
it "uses PostFileFingerprint model methods for fingerprinting and finding similar images" do
|
||||
# We need to mock the image downloading and processing since we can't do that in tests
|
||||
allow(controller).to receive(:process_image_input).and_return(
|
||||
[temp_file_path, "image/jpeg"],
|
||||
)
|
||||
allow(controller).to receive(:create_thumbnail).and_return(
|
||||
"",
|
||||
)
|
||||
|
||||
# Set up expectations for our model methods - this is what we're really testing
|
||||
expect(Domain::PostFileFingerprint).to receive(:from_file_path).with(
|
||||
temp_file_path,
|
||||
).and_return(
|
||||
instance_double(
|
||||
Domain::PostFileFingerprint,
|
||||
hash_value: mock_hash_value,
|
||||
),
|
||||
)
|
||||
|
||||
expect(Domain::PostFileFingerprint).to receive(
|
||||
:similar_to_fingerprint,
|
||||
).with(mock_hash_value).and_return(mock_fingerprints)
|
||||
|
||||
post :visual_results,
|
||||
params: {
|
||||
image_url: "https://example.com/image.jpg",
|
||||
}
|
||||
|
||||
# Just verify the template was rendered
|
||||
expect(response).to render_template(:visual_results)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -50,4 +50,39 @@ FactoryBot.define do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
trait :image_file do
|
||||
file_name { "image.jpg" }
|
||||
url_str { "https://example.com/image.jpg" }
|
||||
file_order { 1 }
|
||||
md5_initial { "d41d8cd98f00b204e9800998ecf8427e" }
|
||||
md5_full { "d41d8cd98f00b204e9800998ecf8427e" }
|
||||
before(:create) do
|
||||
self.log_entry =
|
||||
create(
|
||||
:blob_file,
|
||||
content_type: "image/jpeg",
|
||||
contents:
|
||||
File.read(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/images/thumb-036aaab6-low-quality.jpeg",
|
||||
),
|
||||
),
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
FactoryBot.define do
|
||||
factory :domain_post_file_inkbunny_post_file,
|
||||
class: "Domain::PostFile::InkbunnyPostFile",
|
||||
parent: :domain_post_file do
|
||||
association :post, factory: :domain_post_fa_post
|
||||
sequence(:ib_id) { |n| 12_345 + n }
|
||||
sequence(:file_name) { |n| "image#{n}.jpg" }
|
||||
sequence(:url_str) { |n| "https://example.com/image#{n}.jpg" }
|
||||
file_order { 1 }
|
||||
md5_initial { "d41d8cd98f00b204e9800998ecf8427e" }
|
||||
md5_full { "d41d8cd98f00b204e9800998ecf8427e" }
|
||||
end
|
||||
end
|
||||
|
||||
261
spec/models/domain/post_file_fingerprint_spec.rb
Normal file
261
spec/models/domain/post_file_fingerprint_spec.rb
Normal file
@@ -0,0 +1,261 @@
|
||||
# typed: false
|
||||
require "rails_helper"
|
||||
|
||||
RSpec.describe Domain::PostFileFingerprint, type: :model do
|
||||
describe ".similar_to_fingerprint" do
|
||||
let(:image_paths) do
|
||||
{
|
||||
# Map original images to their low-quality versions
|
||||
"images/thumb-036aaab6-content-container.jpeg" =>
|
||||
"images/thumb-036aaab6-low-quality.jpeg",
|
||||
"images/thumb-ac63d9d7-content-container.jpeg" =>
|
||||
"images/thumb-ac63d9d7-low-quality.jpeg",
|
||||
"images/thumb-c8feb8a92-content-container.jpeg" =>
|
||||
"images/thumb-c8feb8a92-low-quality.jpeg",
|
||||
}
|
||||
end
|
||||
|
||||
before(:each) do
|
||||
# Create posts and post_files with the original images
|
||||
@fingerprints = []
|
||||
|
||||
image_paths.keys.each_with_index do |image_path, index|
|
||||
# Create post and attach the original image
|
||||
post = create(:domain_post_fa_post)
|
||||
|
||||
# Read the image file content
|
||||
image_content =
|
||||
File.read(
|
||||
Rails.root.join("test/fixtures/files", image_path),
|
||||
mode: "rb",
|
||||
)
|
||||
|
||||
# Create a blob file with the image content
|
||||
blob =
|
||||
create(
|
||||
:blob_file,
|
||||
content_bytes: image_content,
|
||||
content_type: "image/jpeg",
|
||||
sha256: Digest::SHA256.digest(image_content),
|
||||
)
|
||||
|
||||
# Create a post file with the blob
|
||||
post_file =
|
||||
create(
|
||||
:domain_post_file,
|
||||
post: post,
|
||||
state: "ok",
|
||||
blob_sha256: blob.sha256,
|
||||
)
|
||||
|
||||
# Create a fingerprint for the post file
|
||||
# The fingerprint should be automatically calculated in the before_validation callback
|
||||
fingerprint = Domain::PostFileFingerprint.create!(post_file: post_file)
|
||||
@fingerprints << fingerprint
|
||||
end
|
||||
end
|
||||
|
||||
it "returns similar images in order of similarity" do
|
||||
# For each low-quality image, generate a fingerprint and test that similar_to_fingerprint
|
||||
# returns the correct original image fingerprint first
|
||||
|
||||
image_paths.each_with_index do |(original_path, low_quality_path), index|
|
||||
# Read the low-quality image
|
||||
low_quality_image_path =
|
||||
Rails.root.join("test/fixtures/files", low_quality_path)
|
||||
|
||||
# Generate a fingerprint for the low-quality image
|
||||
fingerprint = DHashVips::IDHash.fingerprint(low_quality_image_path.to_s)
|
||||
hash_value =
|
||||
fingerprint.to_s(2).rjust(
|
||||
Domain::PostFileFingerprint::HASH_SIZE * 8,
|
||||
"0",
|
||||
)
|
||||
|
||||
# Find similar fingerprints
|
||||
similar_fingerprints =
|
||||
Domain::PostFileFingerprint.similar_to_fingerprint(hash_value)
|
||||
|
||||
# The original image's fingerprint should be in the top results
|
||||
# The PostgreSQL operator <~> (hamming distance) may produce ties
|
||||
# when two fingerprints have the exact same distance, resulting in
|
||||
# arbitrary ordering among equal-distance results
|
||||
expected_fingerprint = @fingerprints[index]
|
||||
found_index = similar_fingerprints.find_index(expected_fingerprint)
|
||||
|
||||
# Get the distance for the first result to compare with our expected result
|
||||
first_result_distance =
|
||||
Domain::PostFileFingerprint.hamming_distance(
|
||||
hash_value,
|
||||
similar_fingerprints.first.hash_value,
|
||||
)
|
||||
|
||||
# Get the distance for our expected fingerprint
|
||||
expected_distance =
|
||||
Domain::PostFileFingerprint.hamming_distance(
|
||||
hash_value,
|
||||
expected_fingerprint.hash_value,
|
||||
)
|
||||
|
||||
# Verify our expected fingerprint is in the results and has the same or
|
||||
# very close distance to the first result
|
||||
expect(similar_fingerprints).to include(expected_fingerprint)
|
||||
expect(found_index).to be <= 1,
|
||||
"Expected fingerprint #{expected_fingerprint.id} should be among the first two results"
|
||||
expect(expected_distance).to be_within(1).of(first_result_distance),
|
||||
"Expected distance (#{expected_distance}) should match the first result's distance (#{first_result_distance})"
|
||||
|
||||
# Calculate similarity for verification
|
||||
similarity =
|
||||
similar_fingerprints.first.similarity_percentage_to(
|
||||
@fingerprints[index],
|
||||
)
|
||||
expect(similarity).to be > 70 # Expect at least 70% similarity
|
||||
|
||||
# Check that this low-quality image has low similarity with OTHER original images
|
||||
other_fingerprint_indices = (0...@fingerprints.size).to_a - [index]
|
||||
other_fingerprint_indices.each do |other_index|
|
||||
# Generate a fingerprint from the low-quality image
|
||||
low_quality_fingerprint = Domain::PostFileFingerprint.new
|
||||
low_quality_fingerprint.hash_value = hash_value
|
||||
|
||||
# Compare with an original image that it should NOT match
|
||||
other_similarity =
|
||||
low_quality_fingerprint.similarity_percentage_to(
|
||||
@fingerprints[other_index],
|
||||
)
|
||||
|
||||
# Should have lower similarity to non-matching original images
|
||||
expect(other_similarity).to be < 70,
|
||||
"Low-quality version of image #{index} should not be similar to original image #{other_index}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it "calculates correct hamming distance between fingerprints" do
|
||||
# Test the hamming_distance_to method
|
||||
reference_fingerprint = @fingerprints.first
|
||||
|
||||
# A fingerprint should have zero distance to itself
|
||||
expect(
|
||||
reference_fingerprint.hamming_distance_to(reference_fingerprint),
|
||||
).to eq(0)
|
||||
|
||||
# Different fingerprints should have non-zero distance
|
||||
expect(
|
||||
reference_fingerprint.hamming_distance_to(@fingerprints[1]),
|
||||
).to be > 0
|
||||
|
||||
# Test the hamming_distance class method
|
||||
expect(
|
||||
Domain::PostFileFingerprint.hamming_distance(
|
||||
reference_fingerprint.hash_value,
|
||||
@fingerprints[1].hash_value,
|
||||
),
|
||||
).to be > 0
|
||||
end
|
||||
|
||||
it "calculates correct similarity percentage" do
|
||||
# Test the similarity_percentage_to method
|
||||
reference_fingerprint = @fingerprints.first
|
||||
|
||||
# A fingerprint should be 100% similar to itself
|
||||
expect(
|
||||
reference_fingerprint.similarity_percentage_to(reference_fingerprint),
|
||||
).to eq(100)
|
||||
|
||||
# Different fingerprints should have less than 100% similarity
|
||||
expect(
|
||||
reference_fingerprint.similarity_percentage_to(@fingerprints[1]),
|
||||
).to be < 100
|
||||
end
|
||||
end
|
||||
|
||||
describe ".from_file_path" do
|
||||
it "creates a fingerprint from a file path" do
|
||||
# Use one of the test images
|
||||
file_path =
|
||||
Rails
|
||||
.root
|
||||
.join(
|
||||
"test/fixtures/files/images/thumb-036aaab6-content-container.jpeg",
|
||||
)
|
||||
.to_s
|
||||
|
||||
# Create a fingerprint from the file path
|
||||
fingerprint = Domain::PostFileFingerprint.from_file_path(file_path)
|
||||
|
||||
# Check that the fingerprint was created properly
|
||||
expect(fingerprint).to be_a(Domain::PostFileFingerprint)
|
||||
expect(fingerprint.hash_value).to be_present
|
||||
expect(fingerprint.hash_value.length).to eq(
|
||||
Domain::PostFileFingerprint::HASH_SIZE * 8,
|
||||
)
|
||||
expect(fingerprint.persisted?).to be(false)
|
||||
end
|
||||
|
||||
it "raises an error for non-existent files" do
|
||||
expect {
|
||||
Domain::PostFileFingerprint.from_file_path("/non/existent/file.jpg")
|
||||
}.to raise_error(ArgumentError, /File does not exist/)
|
||||
end
|
||||
end
|
||||
|
||||
describe ".from_vips_image" do
|
||||
it "creates a fingerprint from a Vips::Image" do
|
||||
# Load a test image as a Vips::Image
|
||||
file_path =
|
||||
Rails
|
||||
.root
|
||||
.join(
|
||||
"test/fixtures/files/images/thumb-036aaab6-content-container.jpeg",
|
||||
)
|
||||
.to_s
|
||||
vips_image = Vips::Image.new_from_file(file_path)
|
||||
|
||||
# Create a fingerprint from the Vips::Image
|
||||
fingerprint = Domain::PostFileFingerprint.from_vips_image(vips_image)
|
||||
|
||||
# Check that the fingerprint was created properly
|
||||
expect(fingerprint).to be_a(Domain::PostFileFingerprint)
|
||||
expect(fingerprint.hash_value).to be_present
|
||||
expect(fingerprint.hash_value.length).to eq(
|
||||
Domain::PostFileFingerprint::HASH_SIZE * 8,
|
||||
)
|
||||
expect(fingerprint.persisted?).to be(false)
|
||||
end
|
||||
|
||||
it "generates fingerprints with high similarity to from_file_path for the same image" do
|
||||
# This test verifies that fingerprints from vips_image and file_path are highly similar
|
||||
# for the same image, even if not exactly identical due to format considerations
|
||||
file_path =
|
||||
Rails
|
||||
.root
|
||||
.join(
|
||||
"test/fixtures/files/images/thumb-ac63d9d7-content-container.jpeg",
|
||||
)
|
||||
.to_s
|
||||
|
||||
# Create fingerprints using both methods
|
||||
vips_image = Vips::Image.new_from_file(file_path)
|
||||
from_vips_fingerprint =
|
||||
Domain::PostFileFingerprint.from_vips_image(vips_image)
|
||||
from_file_fingerprint =
|
||||
Domain::PostFileFingerprint.from_file_path(file_path)
|
||||
|
||||
# The fingerprints might not be 100% identical due to how Vips handles direct images
|
||||
# vs how it handles file loading, but they should have high similarity
|
||||
similarity =
|
||||
100 -
|
||||
(
|
||||
Domain::PostFileFingerprint.hamming_distance(
|
||||
from_vips_fingerprint.hash_value,
|
||||
from_file_fingerprint.hash_value,
|
||||
).to_f / (Domain::PostFileFingerprint::HASH_SIZE * 8) * 100
|
||||
)
|
||||
|
||||
# The similarity should be very high (above 90%)
|
||||
expect(similarity).to be > 90
|
||||
end
|
||||
end
|
||||
end
|
||||
109
spec/models/domain/post_file_spec.rb
Normal file
109
spec/models/domain/post_file_spec.rb
Normal file
@@ -0,0 +1,109 @@
|
||||
# typed: false
|
||||
require "rails_helper"
|
||||
|
||||
RSpec.describe Domain::PostFile, type: :model do
|
||||
describe "after_save callback" do
|
||||
it "creates and saves a fingerprint when saving a post file with a valid image blob" do
|
||||
# Create a post
|
||||
post = create(:domain_post_fa_post)
|
||||
|
||||
# Setup an image file
|
||||
image_path =
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/images/thumb-036aaab6-content-container.jpeg",
|
||||
)
|
||||
image_content = File.read(image_path, mode: "rb")
|
||||
|
||||
# Create blob with image content
|
||||
blob =
|
||||
create(
|
||||
:blob_file,
|
||||
content_bytes: image_content,
|
||||
content_type: "image/jpeg",
|
||||
sha256: Digest::SHA256.digest(image_content),
|
||||
)
|
||||
|
||||
# Count fingerprints before creating the post file
|
||||
fingerprint_count_before = Domain::PostFileFingerprint.count
|
||||
|
||||
# Create and save the post file with the blob reference
|
||||
post_file =
|
||||
create(
|
||||
:domain_post_file,
|
||||
post: post,
|
||||
state: "ok",
|
||||
blob_sha256: blob.sha256,
|
||||
)
|
||||
|
||||
# Verify a fingerprint was automatically created and saved
|
||||
expect(Domain::PostFileFingerprint.count).to eq(
|
||||
fingerprint_count_before + 1,
|
||||
)
|
||||
expect(Domain::PostFileFingerprint.last.post_file).to eq(post_file)
|
||||
|
||||
# Verify the fingerprint has actual content
|
||||
fingerprint = Domain::PostFileFingerprint.last
|
||||
expect(fingerprint.hash_value).to be_present
|
||||
expect(fingerprint.hash_value.length).to eq(
|
||||
Domain::PostFileFingerprint::HASH_SIZE * 8,
|
||||
)
|
||||
end
|
||||
|
||||
it "does not create a fingerprint for non-image files" do
|
||||
# Create a post
|
||||
post = create(:domain_post_fa_post)
|
||||
|
||||
# Setup a text file
|
||||
text_content = "This is a text file, not an image"
|
||||
|
||||
# Create blob with text content
|
||||
blob =
|
||||
create(
|
||||
:blob_file,
|
||||
content_bytes: text_content,
|
||||
content_type: "text/plain",
|
||||
sha256: Digest::SHA256.digest(text_content),
|
||||
)
|
||||
|
||||
# Count fingerprints before creating the post file
|
||||
fingerprint_count_before = Domain::PostFileFingerprint.count
|
||||
|
||||
# Create and save the post file with the blob reference
|
||||
post_file =
|
||||
create(
|
||||
:domain_post_file,
|
||||
post: post,
|
||||
state: "ok",
|
||||
blob_sha256: blob.sha256,
|
||||
)
|
||||
|
||||
# Verify no new fingerprint was created
|
||||
expect(Domain::PostFileFingerprint.count).to eq(fingerprint_count_before)
|
||||
expect(
|
||||
Domain::PostFileFingerprint.where(post_file: post_file).count,
|
||||
).to eq(0)
|
||||
end
|
||||
|
||||
it "does not create a fingerprint for files with missing blobs" do
|
||||
# Create a post
|
||||
post = create(:domain_post_fa_post)
|
||||
|
||||
# Count fingerprints before creating the post file
|
||||
fingerprint_count_before = Domain::PostFileFingerprint.count
|
||||
|
||||
# Create post file without a blob
|
||||
post_file =
|
||||
create(
|
||||
:domain_post_file,
|
||||
post: post,
|
||||
state: "pending", # No blob associated
|
||||
)
|
||||
|
||||
# Verify no new fingerprint was created
|
||||
expect(Domain::PostFileFingerprint.count).to eq(fingerprint_count_before)
|
||||
expect(
|
||||
Domain::PostFileFingerprint.where(post_file: post_file).count,
|
||||
).to eq(0)
|
||||
end
|
||||
end
|
||||
end
|
||||
BIN
test/fixtures/files/images/thumb-036aaab6-low-quality.jpeg
vendored
Normal file
BIN
test/fixtures/files/images/thumb-036aaab6-low-quality.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 KiB |
BIN
test/fixtures/files/images/thumb-ac63d9d7-content-container.jpeg
vendored
Normal file
BIN
test/fixtures/files/images/thumb-ac63d9d7-content-container.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 200 KiB |
BIN
test/fixtures/files/images/thumb-ac63d9d7-low-quality.jpeg
vendored
Normal file
BIN
test/fixtures/files/images/thumb-ac63d9d7-low-quality.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 11 KiB |
BIN
test/fixtures/files/images/thumb-c8feb8a92-content-container.jpeg
vendored
Normal file
BIN
test/fixtures/files/images/thumb-c8feb8a92-content-container.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 219 KiB |
BIN
test/fixtures/files/images/thumb-c8feb8a92-low-quality.jpeg
vendored
Normal file
BIN
test/fixtures/files/images/thumb-c8feb8a92-low-quality.jpeg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 12 KiB |
Reference in New Issue
Block a user