fix visual similarity query, visually similar on post show page
This commit is contained in:
@@ -150,19 +150,17 @@ class Domain::PostsController < DomainController
|
||||
@uploaded_hash_value = generate_fingerprint(image_path)
|
||||
@uploaded_detail_hash_value = generate_detail_fingerprint(image_path)
|
||||
@post_file_fingerprints =
|
||||
find_similar_fingerprints(@uploaded_hash_value).to_a
|
||||
@post_file_fingerprints.sort! do |a, b|
|
||||
helpers.calculate_similarity_percentage(
|
||||
b.fingerprint_detail_value,
|
||||
@uploaded_detail_hash_value,
|
||||
) <=>
|
||||
helpers.calculate_similarity_percentage(
|
||||
a.fingerprint_detail_value,
|
||||
@uploaded_detail_hash_value,
|
||||
)
|
||||
end
|
||||
helpers.find_similar_fingerprints(
|
||||
fingerprint_value: @uploaded_hash_value,
|
||||
fingerprint_detail_value: @uploaded_detail_hash_value,
|
||||
).to_a
|
||||
@post_file_fingerprints = @post_file_fingerprints.take(10)
|
||||
@posts = @post_file_fingerprints.map(&:post_file).compact.map(&:post)
|
||||
@posts =
|
||||
@post_file_fingerprints
|
||||
.map(&:fingerprint)
|
||||
.map(&:post_file)
|
||||
.compact
|
||||
.map(&:post)
|
||||
ensure
|
||||
# Clean up any temporary files
|
||||
if @temp_file
|
||||
@@ -253,26 +251,6 @@ class Domain::PostsController < DomainController
|
||||
Domain::PostFile::BitFingerprint.detail_from_file_path(image_path)
|
||||
end
|
||||
|
||||
# Find similar images based on the fingerprint
|
||||
sig { params(fingerprint_value: String).returns(ActiveRecord::Relation) }
|
||||
def find_similar_fingerprints(fingerprint_value)
|
||||
# Use the model's similar_to_fingerprint method directly
|
||||
|
||||
subquery = <<~SQL
|
||||
(
|
||||
select distinct on (post_file_id) *, (fingerprint_value <~> '#{ActiveRecord::Base.connection.quote_string(fingerprint_value)}') as distance
|
||||
from #{Domain::PostFile::BitFingerprint.table_name}
|
||||
order by post_file_id, distance asc
|
||||
) subquery
|
||||
SQL
|
||||
|
||||
Domain::PostFile::BitFingerprint
|
||||
.select("*")
|
||||
.from(subquery)
|
||||
.order("distance ASC")
|
||||
.limit(32)
|
||||
end
|
||||
|
||||
sig { override.returns(DomainController::DomainParamConfig) }
|
||||
def self.param_config
|
||||
DomainController::DomainParamConfig.new(
|
||||
|
||||
@@ -320,7 +320,12 @@ module Domain::DescriptionsHelper
|
||||
)
|
||||
end
|
||||
def props_for_post_hover_preview(post, link_text, visual_style)
|
||||
cache_key = [post, policy(post), "popover_inline_link_domain_post"]
|
||||
cache_key = [
|
||||
post,
|
||||
policy(post),
|
||||
link_text,
|
||||
"popover_inline_link_domain_post",
|
||||
]
|
||||
Rails
|
||||
.cache
|
||||
.fetch(cache_key) do
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
# typed: true
|
||||
# typed: strict
|
||||
module Domain
|
||||
module VisualSearchHelper
|
||||
extend T::Sig
|
||||
|
||||
# Calculate the similarity percentage between two fingerprint hash values
|
||||
# @param hash_value [String] The hash value to compare
|
||||
# @param reference_hash_value [String] The reference hash value to compare against
|
||||
# @return [Float] The similarity percentage between 0 and 100
|
||||
sig do
|
||||
params(hash_value: String, reference_hash_value: String).returns(Float)
|
||||
end
|
||||
def calculate_similarity_percentage(hash_value, reference_hash_value)
|
||||
# Calculate hamming distance between the two hash values
|
||||
distance =
|
||||
@@ -24,6 +29,7 @@ module Domain
|
||||
# Determine the background color class based on similarity percentage
|
||||
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
|
||||
# @return [String] The Tailwind CSS background color class
|
||||
sig { params(similarity_percentage: Float).returns(String) }
|
||||
def match_badge_bg_color(similarity_percentage)
|
||||
case similarity_percentage
|
||||
when 90..100
|
||||
@@ -40,6 +46,7 @@ module Domain
|
||||
# Determine the text color class based on similarity percentage
|
||||
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
|
||||
# @return [String] The Tailwind CSS text color class
|
||||
sig { params(similarity_percentage: Float).returns(String) }
|
||||
def match_text_color(similarity_percentage)
|
||||
case similarity_percentage
|
||||
when 90..100
|
||||
@@ -56,8 +63,52 @@ module Domain
|
||||
# Get the CSS classes for the match percentage badge
|
||||
# @param similarity_percentage [Float] The similarity percentage between 0 and 100
|
||||
# @return [String] The complete CSS classes for the match percentage badge
|
||||
sig { params(similarity_percentage: Float).returns(String) }
|
||||
def match_badge_classes(similarity_percentage)
|
||||
"#{match_badge_bg_color(similarity_percentage)} text-white font-semibold text-xs rounded-full px-3 py-1 shadow-md"
|
||||
"#{match_badge_bg_color(similarity_percentage)} text-white text-xs rounded-full px-3 py-1 shadow-md"
|
||||
end
|
||||
|
||||
class SimilarFingerprintResult < T::Struct
|
||||
const :fingerprint, Domain::PostFile::BitFingerprint
|
||||
const :similarity_percentage, Float
|
||||
end
|
||||
|
||||
# Find similar images based on the fingerprint
|
||||
sig do
|
||||
params(
|
||||
fingerprint_value: String,
|
||||
fingerprint_detail_value: String,
|
||||
limit: Integer,
|
||||
oversearch: Integer,
|
||||
).returns(T::Array[SimilarFingerprintResult])
|
||||
end
|
||||
def find_similar_fingerprints(
|
||||
fingerprint_value:,
|
||||
fingerprint_detail_value:,
|
||||
limit: 32,
|
||||
oversearch: 2
|
||||
)
|
||||
ActiveRecord::Base.connection.execute("SET ivfflat.probes = 10")
|
||||
|
||||
Domain::PostFile::BitFingerprint
|
||||
.order(
|
||||
Arel.sql "(fingerprint_value <~> '#{ActiveRecord::Base.connection.quote_string(fingerprint_value)}')"
|
||||
)
|
||||
.limit(limit * oversearch)
|
||||
.to_a
|
||||
.uniq(&:post_file_id)
|
||||
.map do |other_fingerprint|
|
||||
SimilarFingerprintResult.new(
|
||||
fingerprint: other_fingerprint,
|
||||
similarity_percentage:
|
||||
calculate_similarity_percentage(
|
||||
fingerprint_detail_value,
|
||||
T.must(other_fingerprint.fingerprint_detail_value),
|
||||
),
|
||||
)
|
||||
end
|
||||
.sort { |a, b| b.similarity_percentage <=> a.similarity_percentage }
|
||||
.take(limit)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
<%# sky-link (default, normal blue link) %>
|
||||
<%# description-section-link (smaller and has a border, for use in description section) %>
|
||||
<% visual_style = local_assigns[:visual_style] || "sky-link" %>
|
||||
<% link_text = local_assigns[:link_text] || post.title_for_view %>
|
||||
<%=
|
||||
react_component(
|
||||
"PostHoverPreviewWrapper",
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
<% post = neighbor.post %>
|
||||
<% creator = post.class.has_creators? ? post.creator : nil %>
|
||||
<div class="text-md flex items-center px-4 py-1 <%= border_classes %>">
|
||||
<%= render "domain/has_description_html/inline_link_domain_post", post: post, link_text: post.title_for_view, visual_style: "sky-link" %>
|
||||
<%= render "domain/has_description_html/inline_link_domain_post", post: post, visual_style: "sky-link" %>
|
||||
</div>
|
||||
<% if creator %>
|
||||
<div class="text-md items-center px-4 py-1 <%= border_classes %>">
|
||||
@@ -23,7 +23,7 @@
|
||||
<% end %>
|
||||
<% end %>
|
||||
<% else %>
|
||||
<div class="col-span-2 p-4 text-center text-slate-500">No similar posts found</div>
|
||||
<div class="col-span-full p-4 text-center text-slate-500">No similar posts found</div>
|
||||
<% end %>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
<section class="sky-section">
|
||||
<div class="section-header">Visually Similar Posts</div>
|
||||
<div class="grid grid-cols-[auto,auto,1fr,auto] bg-slate-100">
|
||||
<% fprint = post.primary_file_for_view&.bit_fingerprints&.first %>
|
||||
<% fprint_value = fprint&.fingerprint_value %>
|
||||
<% fprint_detail_value = fprint&.fingerprint_detail_value %>
|
||||
<% fprints = fprint && fprint_value && fprint_detail_value && find_similar_fingerprints(
|
||||
fingerprint_value: fprint_value,
|
||||
fingerprint_detail_value: fprint_detail_value,
|
||||
limit: 5,
|
||||
oversearch: 5,
|
||||
)
|
||||
.reject { |f| f.fingerprint.id == fprint.id }
|
||||
.reject { |f| f.similarity_percentage < 70 } || []
|
||||
%>
|
||||
<% if fprint.nil? %>
|
||||
<div class="col-span-full p-4 text-center text-slate-500">File not processed</div>
|
||||
<% elsif fprints.any? %>
|
||||
<% num_neighbors = fprints.size %>
|
||||
<% fprints.each_with_index do |similar_fingerprint, index| %>
|
||||
<% border_classes = index < num_neighbors - 1 ? "border-b border-slate-300" : "" %>
|
||||
<% post = similar_fingerprint.fingerprint.post_file.post %>
|
||||
<% creator = post.class.has_creators? ? post.creator : nil %>
|
||||
<div class="text-md items-center pl-4 pr-2 py-1 flex justify-end <%= border_classes %>">
|
||||
<div class="w-full text-center font-medium <%= match_badge_classes(similar_fingerprint.similarity_percentage) %>">
|
||||
<%= similar_fingerprint.similarity_percentage %>%
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex items-center <%= border_classes %>">
|
||||
<% source_url = post.external_url_for_view&.to_s %>
|
||||
<% source_url = source_url && Addressable::URI.parse(source_url).host %>
|
||||
<% icon_path = source_url && icon_path_for_domain(source_url) %>
|
||||
<%= image_tag icon_path, class: "w-6 h-6 mr-2" if icon_path %>
|
||||
</div>
|
||||
<div class="text-md flex items-center pr-2 py-1 <%= border_classes %>">
|
||||
<%= render "domain/has_description_html/inline_link_domain_post", post: post, visual_style: "sky-link" %>
|
||||
</div>
|
||||
<% if creator %>
|
||||
<div class="text-md items-center px-4 py-1 <%= border_classes %>">
|
||||
<%= render "domain/has_description_html/inline_link_domain_user", user: creator, visual_style: "sky-link", icon_size: "large" %>
|
||||
</div>
|
||||
<% else %>
|
||||
<div class="text-md truncate px-4 py-1 <%= border_classes %>">
|
||||
<%= post.primary_creator_name_fallback_for_view %>
|
||||
</div>
|
||||
<% end %>
|
||||
<% end %>
|
||||
<% else %>
|
||||
<div class="col-span-full p-4 text-center text-slate-500">No visually similar posts found</div>
|
||||
<% end %>
|
||||
</div>
|
||||
</section>
|
||||
@@ -5,5 +5,6 @@
|
||||
<%= render_for_model(@post, "section_description", as: :post) %>
|
||||
<%= render_for_model(@post, "section_tags", as: :post) %>
|
||||
<%= render_for_model(@post, "section_sources", as: :post) %>
|
||||
<%= render_for_model(@post, "section_visualy_similar_posts", as: :post) %>
|
||||
<%= render_for_model(@post, "section_similar_posts", as: :post) %>
|
||||
</div>
|
||||
|
||||
@@ -32,9 +32,9 @@
|
||||
<div class="mx-2">
|
||||
<div class="flex flex-wrap gap-3 justify-center">
|
||||
<% @post_file_fingerprints.each do |post_file_fingerprint| %>
|
||||
<% post_file = post_file_fingerprint.post_file %>
|
||||
<% post_file = post_file_fingerprint.fingerprint.post_file %>
|
||||
<% post = post_file.post %>
|
||||
<% similarity_percentage = calculate_similarity_percentage(post_file_fingerprint.fingerprint_detail_value, @uploaded_detail_hash_value) %>
|
||||
<% similarity_percentage = post_file_fingerprint.similarity_percentage %>
|
||||
<div class="flex flex-col h-fit rounded-md border border-gray-300 bg-white shadow hover:shadow-md transition-shadow duration-300 overflow-hidden">
|
||||
<div class="flex justify-between items-center border-b border-gray-200 p-2 bg-gray-50 gap-2">
|
||||
<div class="flex items-center">
|
||||
@@ -69,7 +69,11 @@
|
||||
</span>
|
||||
<span class="font-medium">
|
||||
<% if post.created_at %>
|
||||
<span class="text-gray-500"><i class="far fa-clock mr-1"></i></span> <%= time_ago_in_words(post.created_at) %> ago
|
||||
<span class="text-gray-500"><i class="far fa-clock mr-1"></i></span> <%=
|
||||
post.posted_at.present? ?
|
||||
time_ago_in_words(post.posted_at) :
|
||||
time_ago_in_words(post.created_at)
|
||||
%> ago
|
||||
<% end %>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
partial: "domain/has_description_html/inline_link_domain_post",
|
||||
locals: {
|
||||
post: post,
|
||||
link_text: post.title,
|
||||
visual_style: "sky-link"
|
||||
}
|
||||
) %>
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
<% fav_posts.each do |post| %>
|
||||
<div class="flex flex-col px-4 py-2">
|
||||
<span class="flex gap-2">
|
||||
<%= render "domain/has_description_html/inline_link_domain_post", post: post, link_text: post.title, visual_style: "sky-link" %>
|
||||
<%= render "domain/has_description_html/inline_link_domain_post", post: post, visual_style: "sky-link" %>
|
||||
<span class="whitespace-nowrap flex-grow text-right text-slate-500">
|
||||
<% if posted_at = post.posted_at %>
|
||||
<%= time_ago_in_words(posted_at) %> ago
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
class CreatePostFileThumbnailsFingerprints < ActiveRecord::Migration[7.2]
|
||||
def change
|
||||
up_only { execute "SET DEFAULT_TABLESPACE = mirai;" }
|
||||
|
||||
create_table :domain_post_file_thumbnails do |t|
|
||||
t.references :post_file, null: false, index: false
|
||||
t.integer :thumb_type, null: false
|
||||
@@ -17,7 +19,6 @@ class CreatePostFileThumbnailsFingerprints < ActiveRecord::Migration[7.2]
|
||||
t.timestamps
|
||||
|
||||
t.index %i[post_file_id thumbnail_id], unique: true
|
||||
t.index :fingerprint_value, using: :hnsw, opclass: :bit_hamming_ops
|
||||
end
|
||||
|
||||
# create_table :domain_post_file_vector_fingerprints do |t|
|
||||
|
||||
19
db/migrate/20250619233027_new_fingerprint_value_index.rb
Normal file
19
db/migrate/20250619233027_new_fingerprint_value_index.rb
Normal file
@@ -0,0 +1,19 @@
|
||||
class NewFingerprintValueIndex < ActiveRecord::Migration[7.2]
|
||||
disable_ddl_transaction!
|
||||
|
||||
def up
|
||||
execute <<-SQL
|
||||
CREATE INDEX ivfflat_index_on_fingerprint_value
|
||||
ON public.domain_post_file_bit_fingerprints USING ivfflat
|
||||
(fingerprint_value bit_hamming_ops)
|
||||
WITH (lists = 5000)
|
||||
TABLESPACE mirai;
|
||||
SQL
|
||||
end
|
||||
|
||||
def down
|
||||
execute <<-SQL
|
||||
DROP INDEX ivfflat_index_on_fingerprint_value;
|
||||
SQL
|
||||
end
|
||||
end
|
||||
@@ -7627,6 +7627,15 @@ CREATE UNIQUE INDEX index_users_on_email ON public.users USING btree (email);
|
||||
CREATE UNIQUE INDEX index_users_on_reset_password_token ON public.users USING btree (reset_password_token);
|
||||
|
||||
|
||||
SET default_tablespace = mirai;
|
||||
|
||||
--
|
||||
-- Name: ivfflat_index_on_fingerprint_value; Type: INDEX; Schema: public; Owner: -; Tablespace: mirai
|
||||
--
|
||||
|
||||
CREATE INDEX ivfflat_index_on_fingerprint_value ON public.domain_post_file_bit_fingerprints USING ivfflat (fingerprint_value public.bit_hamming_ops) WITH (lists='20000');
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_blob_entries_p_00_on_sha256; Type: INDEX ATTACH; Schema: public; Owner: -
|
||||
--
|
||||
@@ -8914,6 +8923,7 @@ ALTER TABLE ONLY public.domain_twitter_tweets
|
||||
SET search_path TO "$user", public;
|
||||
|
||||
INSERT INTO "schema_migrations" (version) VALUES
|
||||
('20250619233027'),
|
||||
('20250321050628'),
|
||||
('20250310001341'),
|
||||
('20250310001005'),
|
||||
|
||||
@@ -58,8 +58,9 @@ RSpec.describe Domain::PostsController, type: :controller do
|
||||
).with(temp_file_path).and_return(mock_detail_hash_value)
|
||||
|
||||
# Mock the similar fingerprints search
|
||||
expect(controller).to receive(:find_similar_fingerprints).with(
|
||||
mock_hash_value,
|
||||
expect(controller.helpers).to receive(:find_similar_fingerprints).with(
|
||||
fingerprint_value: mock_hash_value,
|
||||
fingerprint_detail_value: mock_detail_hash_value,
|
||||
).and_return(mock_fingerprints)
|
||||
|
||||
post :visual_results,
|
||||
|
||||
Reference in New Issue
Block a user