[WIP] perceptual hashing

This commit is contained in:
Dylan Knutson
2025-03-02 04:03:56 +00:00
parent 6b395d63d4
commit f969ceb371
34 changed files with 150 additions and 1 deletions

View File

@@ -49,7 +49,6 @@ RUN \
libvips42 \
libyaml-dev \
patch \
rustc \
uuid-dev \
zlib1g-dev \
watchman \

View File

@@ -0,0 +1 @@
source "$HOME/.cargo/env.fish"

View File

@@ -18,3 +18,5 @@ install_extension KoichiSasada.vscode-rdbg
install_extension qwtel.sqlite-viewer
install_extension esbenp.prettier-vscode
install_extension ms-azuretools.vscode-docker
install_extension 1YiB.rust-bundle
install_extension rust-lang.rust-analyzer

View File

@@ -0,0 +1,28 @@
# typed: strict
class Domain::PerceptualHash < ReduxApplicationRecord
include AttrJsonRecordAliases
self.table_name = "domain_perceptual_hashes"
belongs_to :thumbnail,
foreign_key: :thumbnail_id,
class_name: "::Domain::PostFileThumbnail"
validates :algorithm, presence: true
validates :hash_value, presence: true
# Ensure uniqueness of algorithm and hash_version per thumbnail
validates :algorithm, uniqueness: { scope: %i[thumbnail_id] }
# Supported perceptual hash algorithms
ALGORITHMS =
T.let(
{
phash: "phash", # Perceptual hash
dhash: "dhash", # Difference hash
ahash: "ahash", # Average hash
whash: "whash", # Wavelet hash
colorhash: "colorhash", # Color histogram hash
},
T::Hash[Symbol, String],
)
end

View File

@@ -10,6 +10,11 @@ class Domain::PostFile < ReduxApplicationRecord
optional: true,
foreign_key: :blob_sha256
has_many :thumbnails,
class_name: "::Domain::PostFileThumbnail",
foreign_key: :post_file_id,
dependent: :destroy
attr_json :state, :string
attr_json :url_str, :string
attr_json :error_message, :string

View File

@@ -0,0 +1,47 @@
# typed: strict
class Domain::PostFileThumbnail < ReduxApplicationRecord
include AttrJsonRecordAliases
self.table_name = "domain_post_file_thumbnails"
belongs_to :post_file,
foreign_key: :post_file_id,
class_name: "::Domain::PostFile"
has_many :perceptual_hashes,
class_name: "::Domain::PerceptualHash",
foreign_key: :thumbnail_id,
dependent: :destroy
validates :thumbnail_type,
presence: true,
uniqueness: {
scope: :post_file_id,
},
inclusion: {
in: THUMBNAIL_TYPES.keys,
}
# Thumbnail types for different uses
THUMBNAIL_TYPES =
T.let(
{
small: {
width: 128,
height: 128,
},
medium: {
width: 256,
height: 256,
},
large: {
width: 512,
height: 512,
},
phash: {
width: 64,
height: 64,
}, # Special size for perceptual hashing
},
T::Hash[Symbol, T::Hash[Symbol, Integer]],
)
end

View File

@@ -0,0 +1,14 @@
class CreateDomainPostFileThumbnails < ActiveRecord::Migration[7.0]
def change
create_table :domain_post_file_thumbnails do |t|
t.references :post_file,
null: false,
foreign_key: {
to_table: :domain_post_files,
},
index: true
t.string :thumbnail_type, null: false, index: true
t.timestamps
end
end
end

View File

@@ -0,0 +1,17 @@
class CreateDomainPerceptualHashes < ActiveRecord::Migration[7.0]
def change
create_table :domain_perceptual_hashes do |t|
t.references :thumbnail,
null: false,
foreign_key: {
to_table: :domain_post_file_thumbnails,
},
index: true
t.string :algorithm, null: false, index: true
t.vector :hash_value
t.timestamps
end
# TODO - figure out right index type for hash_value
end
end

7
media-thumbnailer/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "media-thumbnailer"
version = "0.1.0"

View File

@@ -0,0 +1,6 @@
[package]
name = "media-thumbnailer"
version = "0.1.0"
edition = "2024"
[dependencies]

View File

@@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}

View File

@@ -0,0 +1 @@
{"rustc_fingerprint":2276600985026381000,"outputs":{"13331785392996375709":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/vscode/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.85.0 (4d91de4e4 2025-02-17)\nbinary: rustc\ncommit-hash: 4d91de4e48198da2e33413efdcd9cd2cc0c46688\ncommit-date: 2025-02-17\nhost: x86_64-unknown-linux-gnu\nrelease: 1.85.0\nLLVM version: 19.1.7\n","stderr":""},"2063776225603076451":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/vscode/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""}},"successes":{}}

View File

@@ -0,0 +1,3 @@
Signature: 8a477f597d28d172789f06886806bc55
# This file is a cache directory tag created by cargo.
# For information about cache directory tags see https://bford.info/cachedir/

View File

@@ -0,0 +1 @@
{"rustc":8277423686421874925,"features":"[]","declared_features":"[]","target":9596570253498806976,"profile":17672942494452627365,"path":4942398508502643691,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/media-thumbnailer-5574003c59783d36/dep-bin-media-thumbnailer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
{"rustc":8277423686421874925,"features":"[]","declared_features":"[]","target":9596570253498806976,"profile":3316208278650011218,"path":4942398508502643691,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/media-thumbnailer-706bac28c87bc41d/dep-test-bin-media-thumbnailer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1,5 @@
/workspaces/redux-scraper/media-thumbnailer/target/debug/deps/libmedia_thumbnailer-5574003c59783d36.rmeta: src/main.rs
/workspaces/redux-scraper/media-thumbnailer/target/debug/deps/media_thumbnailer-5574003c59783d36.d: src/main.rs
src/main.rs:

View File

@@ -0,0 +1,5 @@
/workspaces/redux-scraper/media-thumbnailer/target/debug/deps/libmedia_thumbnailer-706bac28c87bc41d.rmeta: src/main.rs
/workspaces/redux-scraper/media-thumbnailer/target/debug/deps/media_thumbnailer-706bac28c87bc41d.d: src/main.rs
src/main.rs: