fix visual similarity query, visually similar on post show page

This commit is contained in:
Dylan Knutson
2025-06-20 04:05:29 +00:00
parent a2091e1555
commit c527a05705
14 changed files with 167 additions and 45 deletions

View File

@@ -150,19 +150,17 @@ class Domain::PostsController < DomainController
@uploaded_hash_value = generate_fingerprint(image_path)
@uploaded_detail_hash_value = generate_detail_fingerprint(image_path)
@post_file_fingerprints =
find_similar_fingerprints(@uploaded_hash_value).to_a
@post_file_fingerprints.sort! do |a, b|
helpers.calculate_similarity_percentage(
b.fingerprint_detail_value,
@uploaded_detail_hash_value,
) <=>
helpers.calculate_similarity_percentage(
a.fingerprint_detail_value,
@uploaded_detail_hash_value,
)
end
helpers.find_similar_fingerprints(
fingerprint_value: @uploaded_hash_value,
fingerprint_detail_value: @uploaded_detail_hash_value,
).to_a
@post_file_fingerprints = @post_file_fingerprints.take(10)
@posts = @post_file_fingerprints.map(&:post_file).compact.map(&:post)
@posts =
@post_file_fingerprints
.map(&:fingerprint)
.map(&:post_file)
.compact
.map(&:post)
ensure
# Clean up any temporary files
if @temp_file
@@ -253,26 +251,6 @@ class Domain::PostsController < DomainController
Domain::PostFile::BitFingerprint.detail_from_file_path(image_path)
end
# Find similar images based on the fingerprint
sig { params(fingerprint_value: String).returns(ActiveRecord::Relation) }
def find_similar_fingerprints(fingerprint_value)
# Use the model's similar_to_fingerprint method directly
subquery = <<~SQL
(
select distinct on (post_file_id) *, (fingerprint_value <~> '#{ActiveRecord::Base.connection.quote_string(fingerprint_value)}') as distance
from #{Domain::PostFile::BitFingerprint.table_name}
order by post_file_id, distance asc
) subquery
SQL
Domain::PostFile::BitFingerprint
.select("*")
.from(subquery)
.order("distance ASC")
.limit(32)
end
sig { override.returns(DomainController::DomainParamConfig) }
def self.param_config
DomainController::DomainParamConfig.new(

View File

@@ -320,7 +320,12 @@ module Domain::DescriptionsHelper
)
end
def props_for_post_hover_preview(post, link_text, visual_style)
cache_key = [post, policy(post), "popover_inline_link_domain_post"]
cache_key = [
post,
policy(post),
link_text,
"popover_inline_link_domain_post",
]
Rails
.cache
.fetch(cache_key) do

View File

@@ -1,10 +1,15 @@
# typed: true
# typed: strict
module Domain
module VisualSearchHelper
extend T::Sig
# Calculate the similarity percentage between two fingerprint hash values
# @param hash_value [String] The hash value to compare
# @param reference_hash_value [String] The reference hash value to compare against
# @return [Float] The similarity percentage between 0 and 100
sig do
params(hash_value: String, reference_hash_value: String).returns(Float)
end
def calculate_similarity_percentage(hash_value, reference_hash_value)
# Calculate hamming distance between the two hash values
distance =
@@ -24,6 +29,7 @@ module Domain
# Determine the background color class based on similarity percentage
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
# @return [String] The Tailwind CSS background color class
sig { params(similarity_percentage: Float).returns(String) }
def match_badge_bg_color(similarity_percentage)
case similarity_percentage
when 90..100
@@ -40,6 +46,7 @@ module Domain
# Determine the text color class based on similarity percentage
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
# @return [String] The Tailwind CSS text color class
sig { params(similarity_percentage: Float).returns(String) }
def match_text_color(similarity_percentage)
case similarity_percentage
when 90..100
@@ -56,8 +63,52 @@ module Domain
# Get the CSS classes for the match percentage badge
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
# @return [String] The complete CSS classes for the match percentage badge
sig { params(similarity_percentage: Float).returns(String) }
def match_badge_classes(similarity_percentage)
"#{match_badge_bg_color(similarity_percentage)} text-white font-semibold text-xs rounded-full px-3 py-1 shadow-md"
"#{match_badge_bg_color(similarity_percentage)} text-white text-xs rounded-full px-3 py-1 shadow-md"
end
class SimilarFingerprintResult < T::Struct
const :fingerprint, Domain::PostFile::BitFingerprint
const :similarity_percentage, Float
end
# Find similar images based on the fingerprint
sig do
params(
fingerprint_value: String,
fingerprint_detail_value: String,
limit: Integer,
oversearch: Integer,
).returns(T::Array[SimilarFingerprintResult])
end
def find_similar_fingerprints(
fingerprint_value:,
fingerprint_detail_value:,
limit: 32,
oversearch: 2
)
ActiveRecord::Base.connection.execute("SET ivfflat.probes = 10")
Domain::PostFile::BitFingerprint
.order(
Arel.sql "(fingerprint_value <~> '#{ActiveRecord::Base.connection.quote_string(fingerprint_value)}')"
)
.limit(limit * oversearch)
.to_a
.uniq(&:post_file_id)
.map do |other_fingerprint|
SimilarFingerprintResult.new(
fingerprint: other_fingerprint,
similarity_percentage:
calculate_similarity_percentage(
fingerprint_detail_value,
T.must(other_fingerprint.fingerprint_detail_value),
),
)
end
.sort { |a, b| b.similarity_percentage <=> a.similarity_percentage }
.take(limit)
end
end
end

View File

@@ -2,6 +2,7 @@
<%# sky-link (default, normal blue link) %>
<%# description-section-link (smaller and has a border, for use in description section) %>
<% visual_style = local_assigns[:visual_style] || "sky-link" %>
<% link_text = local_assigns[:link_text] || post.title_for_view %>
<%=
react_component(
"PostHoverPreviewWrapper",

View File

@@ -10,7 +10,7 @@
<% post = neighbor.post %>
<% creator = post.class.has_creators? ? post.creator : nil %>
<div class="text-md flex items-center px-4 py-1 <%= border_classes %>">
<%= render "domain/has_description_html/inline_link_domain_post", post: post, link_text: post.title_for_view, visual_style: "sky-link" %>
<%= render "domain/has_description_html/inline_link_domain_post", post: post, visual_style: "sky-link" %>
</div>
<% if creator %>
<div class="text-md items-center px-4 py-1 <%= border_classes %>">
@@ -23,7 +23,7 @@
<% end %>
<% end %>
<% else %>
<div class="col-span-2 p-4 text-center text-slate-500">No similar posts found</div>
<div class="col-span-full p-4 text-center text-slate-500">No similar posts found</div>
<% end %>
</div>
</section>

View File

@@ -0,0 +1,52 @@
<section class="sky-section">
<div class="section-header">Visually Similar Posts</div>
<div class="grid grid-cols-[auto,auto,1fr,auto] bg-slate-100">
<% fprint = post.primary_file_for_view&.bit_fingerprints&.first %>
<% fprint_value = fprint&.fingerprint_value %>
<% fprint_detail_value = fprint&.fingerprint_detail_value %>
<% fprints = fprint && fprint_value && fprint_detail_value && find_similar_fingerprints(
fingerprint_value: fprint_value,
fingerprint_detail_value: fprint_detail_value,
limit: 5,
oversearch: 5,
)
.reject { |f| f.fingerprint.id == fprint.id }
.reject { |f| f.similarity_percentage < 70 } || []
%>
<% if fprint.nil? %>
<div class="col-span-full p-4 text-center text-slate-500">File not processed</div>
<% elsif fprints.any? %>
<% num_neighbors = fprints.size %>
<% fprints.each_with_index do |similar_fingerprint, index| %>
<% border_classes = index < num_neighbors - 1 ? "border-b border-slate-300" : "" %>
<% post = similar_fingerprint.fingerprint.post_file.post %>
<% creator = post.class.has_creators? ? post.creator : nil %>
<div class="text-md items-center pl-4 pr-2 py-1 flex justify-end <%= border_classes %>">
<div class="w-full text-center font-medium <%= match_badge_classes(similar_fingerprint.similarity_percentage) %>">
<%= similar_fingerprint.similarity_percentage %>%
</div>
</div>
<div class="flex items-center <%= border_classes %>">
<% source_url = post.external_url_for_view&.to_s %>
<% source_url = source_url && Addressable::URI.parse(source_url).host %>
<% icon_path = source_url && icon_path_for_domain(source_url) %>
<%= image_tag icon_path, class: "w-6 h-6 mr-2" if icon_path %>
</div>
<div class="text-md flex items-center pr-2 py-1 <%= border_classes %>">
<%= render "domain/has_description_html/inline_link_domain_post", post: post, visual_style: "sky-link" %>
</div>
<% if creator %>
<div class="text-md items-center px-4 py-1 <%= border_classes %>">
<%= render "domain/has_description_html/inline_link_domain_user", user: creator, visual_style: "sky-link", icon_size: "large" %>
</div>
<% else %>
<div class="text-md truncate px-4 py-1 <%= border_classes %>">
<%= post.primary_creator_name_fallback_for_view %>
</div>
<% end %>
<% end %>
<% else %>
<div class="col-span-full p-4 text-center text-slate-500">No visually similar posts found</div>
<% end %>
</div>
</section>

View File

@@ -5,5 +5,6 @@
<%= render_for_model(@post, "section_description", as: :post) %>
<%= render_for_model(@post, "section_tags", as: :post) %>
<%= render_for_model(@post, "section_sources", as: :post) %>
<%= render_for_model(@post, "section_visualy_similar_posts", as: :post) %>
<%= render_for_model(@post, "section_similar_posts", as: :post) %>
</div>

View File

@@ -32,9 +32,9 @@
<div class="mx-2">
<div class="flex flex-wrap gap-3 justify-center">
<% @post_file_fingerprints.each do |post_file_fingerprint| %>
<% post_file = post_file_fingerprint.post_file %>
<% post_file = post_file_fingerprint.fingerprint.post_file %>
<% post = post_file.post %>
<% similarity_percentage = calculate_similarity_percentage(post_file_fingerprint.fingerprint_detail_value, @uploaded_detail_hash_value) %>
<% similarity_percentage = post_file_fingerprint.similarity_percentage %>
<div class="flex flex-col h-fit rounded-md border border-gray-300 bg-white shadow hover:shadow-md transition-shadow duration-300 overflow-hidden">
<div class="flex justify-between items-center border-b border-gray-200 p-2 bg-gray-50 gap-2">
<div class="flex items-center">
@@ -69,7 +69,11 @@
</span>
<span class="font-medium">
<% if post.created_at %>
<span class="text-gray-500"><i class="far fa-clock mr-1"></i></span> <%= time_ago_in_words(post.created_at) %> ago
<span class="text-gray-500"><i class="far fa-clock mr-1"></i></span> <%=
post.posted_at.present? ?
time_ago_in_words(post.posted_at) :
time_ago_in_words(post.created_at)
%> ago
<% end %>
</span>
</div>

View File

@@ -14,7 +14,6 @@
partial: "domain/has_description_html/inline_link_domain_post",
locals: {
post: post,
link_text: post.title,
visual_style: "sky-link"
}
) %>

View File

@@ -11,7 +11,7 @@
<% fav_posts.each do |post| %>
<div class="flex flex-col px-4 py-2">
<span class="flex gap-2">
<%= render "domain/has_description_html/inline_link_domain_post", post: post, link_text: post.title, visual_style: "sky-link" %>
<%= render "domain/has_description_html/inline_link_domain_post", post: post, visual_style: "sky-link" %>
<span class="whitespace-nowrap flex-grow text-right text-slate-500">
<% if posted_at = post.posted_at %>
<%= time_ago_in_words(posted_at) %> ago

View File

@@ -1,5 +1,7 @@
class CreatePostFileThumbnailsFingerprints < ActiveRecord::Migration[7.2]
def change
up_only { execute "SET DEFAULT_TABLESPACE = mirai;" }
create_table :domain_post_file_thumbnails do |t|
t.references :post_file, null: false, index: false
t.integer :thumb_type, null: false
@@ -17,7 +19,6 @@ class CreatePostFileThumbnailsFingerprints < ActiveRecord::Migration[7.2]
t.timestamps
t.index %i[post_file_id thumbnail_id], unique: true
t.index :fingerprint_value, using: :hnsw, opclass: :bit_hamming_ops
end
# create_table :domain_post_file_vector_fingerprints do |t|

View File

@@ -0,0 +1,19 @@
class NewFingerprintValueIndex < ActiveRecord::Migration[7.2]
disable_ddl_transaction!
def up
execute <<-SQL
CREATE INDEX ivfflat_index_on_fingerprint_value
ON public.domain_post_file_bit_fingerprints USING ivfflat
(fingerprint_value bit_hamming_ops)
WITH (lists = 5000)
TABLESPACE mirai;
SQL
end
def down
execute <<-SQL
DROP INDEX ivfflat_index_on_fingerprint_value;
SQL
end
end

View File

@@ -7627,6 +7627,15 @@ CREATE UNIQUE INDEX index_users_on_email ON public.users USING btree (email);
CREATE UNIQUE INDEX index_users_on_reset_password_token ON public.users USING btree (reset_password_token);
SET default_tablespace = mirai;
--
-- Name: ivfflat_index_on_fingerprint_value; Type: INDEX; Schema: public; Owner: -; Tablespace: mirai
--
CREATE INDEX ivfflat_index_on_fingerprint_value ON public.domain_post_file_bit_fingerprints USING ivfflat (fingerprint_value public.bit_hamming_ops) WITH (lists='20000');
--
-- Name: index_blob_entries_p_00_on_sha256; Type: INDEX ATTACH; Schema: public; Owner: -
--
@@ -8914,6 +8923,7 @@ ALTER TABLE ONLY public.domain_twitter_tweets
SET search_path TO "$user", public;
INSERT INTO "schema_migrations" (version) VALUES
('20250619233027'),
('20250321050628'),
('20250310001341'),
('20250310001005'),

View File

@@ -58,8 +58,9 @@ RSpec.describe Domain::PostsController, type: :controller do
).with(temp_file_path).and_return(mock_detail_hash_value)
# Mock the similar fingerprints search
expect(controller).to receive(:find_similar_fingerprints).with(
mock_hash_value,
expect(controller.helpers).to receive(:find_similar_fingerprints).with(
fingerprint_value: mock_hash_value,
fingerprint_detail_value: mock_detail_hash_value,
).and_return(mock_fingerprints)
post :visual_results,