better similarity helpers

This commit is contained in:
Dylan Knutson
2025-02-27 22:55:48 +00:00
parent d45b6047b6
commit b452e04af4
26 changed files with 268 additions and 236 deletions

View File

@@ -18,13 +18,6 @@ class ApplicationController < ActionController::Base
protected
def set_ivfflat_probes!
ReduxApplicationRecord.connection.execute("SET ivfflat.max_probes = 10")
ReduxApplicationRecord.connection.execute("SET ivfflat.probes = 10")
end
protected
def prometheus_client
PrometheusExporter::Client.default
end

View File

@@ -132,96 +132,8 @@ class Domain::Fa::ApiController < ApplicationController
}
end
def similar_users
url_name = params[:url_name]
exclude_url_name = params[:exclude_url_name]
user = Domain::User::FaUser.find_by(url_name: url_name)
if user.nil?
render status: 404,
json: {
error: "user '#{url_name}' not found",
error_type: "user_not_found",
}
return
end
all_similar_users = helpers.similar_users_by_followed(user, limit: 10)
if all_similar_users.nil?
render status: 500,
json: {
error:
"user '#{url_name}' has not had recommendations computed yet",
error_type: "recs_not_computed",
}
return
end
all_similar_users = users_list_to_similar_list(all_similar_users)
not_followed_similar_users = nil
if exclude_url_name
exclude_folowed_by_user =
Domain::User::FaUser.find_by(url_name: exclude_url_name)
not_followed_similar_users =
if exclude_folowed_by_user.nil?
# TODO - enqueue a manual UserFollowsJob for this user and have client
# re-try the request later
render status: 500,
json: {
error: "user '#{exclude_url_name}' not found",
error_type: "exclude_user_not_found",
}
elsif exclude_folowed_by_user.scanned_follows_at.nil?
render status: 500,
json: {
error:
"user '#{exclude_url_name}' followers list hasn't been scanned",
error_type: "exclude_user_not_scanned",
}
else
users_list_to_similar_list(
helpers.similar_users_by_followed(
user,
limit: 10,
exclude_followed_by: exclude_folowed_by_user,
),
)
end
end
render json: {
all: all_similar_users,
not_followed: not_followed_similar_users,
}
end
private
def get_best_user_page_http_log_entry_for(user)
for_path =
proc do |uri_path|
HttpLogEntry
.where(
uri_scheme: "https",
uri_host: "www.furaffinity.net",
uri_path: uri_path,
)
.order(created_at: :desc)
.first
&.response
end
for_hle_id =
proc { |hle_id| hle_id && HttpLogEntry.find_by(id: hle_id)&.response }
# older versions don't end in a trailing slash
hle_id = user.log_entry_detail && user.log_entry_detail["last_user_page_id"]
for_hle_id.call(hle_id) || for_path.call("/user/#{user.url_name}/") ||
for_path.call("/user/#{user.url_name}")
end
def defer_post_scan(post, fa_id)
if !post || !post.scanned?
defer_manual(Domain::Fa::Job::ScanPostJob, { fa_id: fa_id }, -17)
@@ -287,50 +199,6 @@ class Domain::Fa::ApiController < ApplicationController
end
end
sig do
params(users_list: T::Array[Domain::User::FaUser]).returns(
T::Array[T::Hash[Symbol, T.untyped]],
)
end
def users_list_to_similar_list(users_list)
users_list.map do |user|
profile_thumb_url = user.avatar&.log_entry&.uri_str
profile_thumb_url ||
begin
profile_page_response = get_best_user_page_http_log_entry_for(user)
if profile_page_response
parser =
Domain::Fa::Parser::Page.new(
profile_page_response.contents,
require_logged_in: false,
)
profile_thumb_url = parser.user_page.profile_thumb_url
else
if user.follows_scan.due?
Domain::Fa::Job::UserFollowsJob.set(
{ priority: -20 },
).perform_later({ user: user })
end
if user.page_scan.due?
Domain::Fa::Job::UserPageJob.set({ priority: -20 }).perform_later(
{ user: user },
)
end
end
rescue StandardError
logger.error("error getting profile_thumb_url: #{$!.message}")
end
{
name: user.name,
url_name: user.url_name,
profile_thumb_url: profile_thumb_url,
external_url: "https://www.furaffinity.net/user/#{user.url_name}/",
refurrer_url: request.base_url + helpers.domain_user_path(user),
}
end
end
API_TOKENS = {
"a4eb03ac-b33c-439c-9b51-a834d1c5cf48" => "dymk",
"56cc81fe-8c00-4436-8981-4580eab00e66" => "taargus",

View File

@@ -6,7 +6,12 @@ class Domain::UsersController < DomainController
before_action :set_user!, only: %i[show]
before_action :set_post!, only: %i[users_faving_post]
skip_before_action :authenticate_user!,
only: %i[show search_by_name users_faving_post]
only: %i[
show
search_by_name
users_faving_post
similar_users
]
# GET /users
sig(:final) { void }
@@ -66,6 +71,69 @@ class Domain::UsersController < DomainController
.without_count
end
sig { void }
def similar_users
url_name = params[:url_name]
exclude_url_name = params[:exclude_url_name]
user = Domain::User::FaUser.find_by(url_name: url_name)
if user.nil?
render status: 404,
json: {
error: "user '#{url_name}' not found",
error_type: "user_not_found",
}
return
end
all_similar_users =
users_similar_to_by_followers(user, limit: 10).map do |u|
user_to_similarity_entry(u)
end
if all_similar_users.nil?
render status: 500,
json: {
error:
"user '#{url_name}' has not had recommendations computed yet",
error_type: "recs_not_computed",
}
return
end
not_followed_similar_users = nil
if exclude_url_name
exclude_followed_by =
Domain::User::FaUser.find_by(url_name: exclude_url_name)
if exclude_followed_by.nil?
render status: 500,
json: {
error: "user '#{exclude_url_name}' not found",
error_type: "exclude_user_not_found",
}
elsif exclude_followed_by.scanned_follows_at.nil?
render status: 500,
json: {
error:
"user '#{exclude_url_name}' followers list hasn't been scanned",
error_type: "exclude_user_not_scanned",
}
else
not_followed_similar_users =
users_similar_to_by_followers(
user,
limit: 10,
exclude_followed_by: exclude_followed_by,
).map { |u| user_to_similarity_entry(u) }
end
end
render json: {
all: all_similar_users,
not_followed: not_followed_similar_users,
}
end
private
sig { override.returns(DomainController::DomainParamConfig) }
@@ -76,4 +144,77 @@ class Domain::UsersController < DomainController
post_group_id_param: :post_group_id,
)
end
sig { params(user: Domain::User::FaUser).returns(T::Hash[Symbol, T.untyped]) }
def user_to_similarity_entry(user)
profile_thumb_url = user.avatar&.log_entry&.uri_str
profile_thumb_url ||=
begin
pp_log_entry = get_best_user_page_http_log_entry_for(user)
if pp_log_entry && (response_bytes = pp_log_entry.response_bytes)
parser =
Domain::Fa::Parser::Page.new(
response_bytes,
require_logged_in: false,
)
parser.user_page.profile_thumb_url
end
rescue StandardError
logger.error("error getting profile_thumb_url: #{$!.message}")
end || "https://a.furaffinity.net/0/#{user.url_name}.gif"
{
name: user.name,
url_name: user.url_name,
profile_thumb_url: profile_thumb_url,
external_url: "https://www.furaffinity.net/user/#{user.url_name}/",
refurrer_url: request.base_url + helpers.domain_user_path(user),
}
end
sig { params(user: Domain::User::FaUser).returns(T.nilable(HttpLogEntry)) }
def get_best_user_page_http_log_entry_for(user)
for_path =
proc do |uri_path|
HttpLogEntry
.where(
uri_scheme: "https",
uri_host: "www.furaffinity.net",
uri_path: uri_path,
)
.order(created_at: :desc)
.first
end
# older versions don't end in a trailing slash
user.last_user_page_log_entry || for_path.call("/user/#{user.url_name}/") ||
for_path.call("/user/#{user.url_name}")
end
sig do
params(
user: Domain::User::FaUser,
limit: Integer,
exclude_followed_by: T.nilable(Domain::User::FaUser),
).returns(T::Array[Domain::User::FaUser])
end
def users_similar_to_by_followers(user, limit: 10, exclude_followed_by: nil)
factors = Domain::Factors::UserUserFollowToFactors.find_by(user: user)
return [] if factors.nil?
relation =
Domain::NeighborFinder
.find_neighbors(factors)
.limit(limit)
.includes(:user)
if exclude_followed_by
relation =
relation.where.not(
user_id: exclude_followed_by.followed_users.select(:to_id),
)
end
relation.map(&:user)
end
end

View File

@@ -99,11 +99,10 @@ module Domain::Fa::UsersHelper
def similar_users_by_followed(user, limit: 10, exclude_followed_by: nil)
factors = Domain::Factors::UserUserFollowToFactors.find_by(user: user)
return [] if factors.nil?
ReduxApplicationRecord.connection.execute("SET ivfflat.probes = 32")
relation =
factors
.nearest_neighbors(:embedding, distance: "euclidean")
Domain::NeighborFinder
.find_neighbors(factors)
.limit(limit)
.includes(:user)
if exclude_followed_by

View File

@@ -6,24 +6,33 @@ module Domain::ModelHelper
abstract!
sig do
params(model: HasViewPrefix, partial: String, as: Symbol).returns(
T.nilable(String),
)
params(
model: HasViewPrefix,
partial: String,
as: Symbol,
expires_in: ActiveSupport::Duration,
cache_key: T.untyped,
).returns(T.nilable(String))
end
def render_for_model(model, partial, as:)
prefixes = lookup_context.prefixes
partial_path =
prefixes
.map { |prefix| "#{prefix}/#{model.class.view_prefix}/_#{partial}" }
.find { |view| lookup_context.exists?(view) } ||
prefixes
.map { |prefix| "#{prefix}/default/_#{partial}" }
.find { |view| lookup_context.exists?(view) } ||
Kernel.raise("no partial found for #{partial} in #{prefixes}")
def render_for_model(model, partial, as:, expires_in: 1.hour, cache_key: nil)
cache_key ||= [model, partial]
Rails
.cache
.fetch(cache_key, expires_in:) do
prefixes = lookup_context.prefixes
partial_path =
prefixes
.map { |prefix| "#{prefix}/#{model.class.view_prefix}/_#{partial}" }
.find { |view| lookup_context.exists?(view) } ||
prefixes
.map { |prefix| "#{prefix}/default/_#{partial}" }
.find { |view| lookup_context.exists?(view) } ||
Kernel.raise("no partial found for #{partial} in #{prefixes}")
partial_path = partial_path.split("/")
T.must(partial_path.last).delete_prefix!("_")
partial_path = partial_path.join("/")
render partial: partial_path, locals: { as => model }
partial_path = partial_path.split("/")
T.must(partial_path.last).delete_prefix!("_")
partial_path = partial_path.join("/")
render partial: partial_path, locals: { as => model }
end
end
end

View File

@@ -1,7 +1,8 @@
# typed: strict
module HelpersInterface
module TimestampHelper
extend T::Sig
extend T::Helpers
include HelpersInterface
sig do
params(timestamp: T.nilable(ActiveSupport::TimeWithZone)).returns(String)

View File

@@ -944,7 +944,13 @@ class Domain::MigrateToDomain
sig { params(user: Domain::User::FaUser).void }
def migrate_fa_user_followed_users(user)
user_url_name = user.url_name
old_user = Domain::Fa::User.find_by(url_name: user_url_name) || return
old_user =
Domain::Fa::User.find_by(url_name: user_url_name) ||
begin
user.migrated_followed_users_at = Time.current
user.save!
return
end
followed_user_url_names = old_user.follows.pluck(:url_name)
new_user_ids =
Domain::User::FaUser.where(url_name: followed_user_url_names).pluck(:id)

View File

@@ -0,0 +1,15 @@
# typed: strict
module Domain::NeighborFinder
extend T::Sig
sig do
type_parameters(:F)
.params(factors: T.all(T.type_parameter(:F), Domain::Factors))
.returns(ActiveRecord::Relation)
end
def self.find_neighbors(factors)
Domain::Factors.connection.execute("SET ivfflat.max_probes = 10")
Domain::Factors.connection.execute("SET ivfflat.probes = 10")
factors.nearest_neighbors(:embedding, distance: "cosine")
end
end

View File

@@ -0,0 +1,5 @@
# typed: strict
module HelpersInterface
extend T::Sig
extend T::Helpers
end

View File

@@ -1,5 +1,6 @@
# typed: strict
class Domain::Factors < ReduxApplicationRecord
include Neighbor::Model
self.abstract_class = true
has_neighbors :embedding
end

View File

@@ -5,9 +5,9 @@
>
<% factors = Domain::Factors::UserPostFavPostFactors.find_by(post: post) %>
<% if factors %>
<% nearest_neighbors = factors.nearest_neighbors(:embedding, distance: "euclidean").includes(:post).limit(10) %>
<% nearest_neighbors.each do |factor| %>
<% post = factor.post %>
<% neighbors = Domain::NeighborFinder.find_neighbors(factors).includes(:post).limit(10) %>
<% neighbors.each do |neighbor| %>
<% post = neighbor.post %>
<% creator = post.class.has_creators? ? post.creator : nil %>
<div class="col-span-3 grid grid-cols-subgrid items-center">
<span class="text-md truncate px-4 py-2">

View File

@@ -1,26 +1,14 @@
<div class="mx-auto mt-4 flex w-full max-w-2xl flex-col gap-4 pb-4">
<% cache [@post, "section_post_title"], expires_in: 1.hour do %>
<%= render_for_model(@post, "section_post_title", as: :post) %>
<% end %>
<% cache [@post, "section_post_groups"], expires_in: 1.hour do %>
<%= render_for_model(@post, "section_post_groups", as: :post) %>
<% end %>
<% cache [@post, "section_primary_file"], expires_in: 1.hour do %>
<%= render_for_model(@post, "section_primary_file", as: :post) %>
<% end %>
<%= render_for_model(@post, "section_post_title", as: :post) %>
<%= render_for_model(@post, "section_post_groups", as: :post) %>
<%= render_for_model(@post, "section_primary_file", as: :post) %>
<% cache [@post, "section_description_sanitized"], expires_in: 1.hour do %>
<%= render "domain/has_description_html/section_description_sanitized",
model: @post,
description_title: "Description",
no_description_text: "No description available" %>
<% end %>
<% cache [@post, "section_tags"], expires_in: 1.hour do %>
<%= render_for_model(@post, "section_tags", as: :post) %>
<% end %>
<% cache [@post, "section_sources"], expires_in: 1.hour do %>
<%= render_for_model(@post, "section_sources", as: :post) %>
<% end %>
<% cache [@post, "section_similar_posts"], expires_in: 1.hour do %>
<%= render_for_model(@post, "section_similar_posts", as: :post) %>
<% end %>
<%= render_for_model(@post, "section_tags", as: :post) %>
<%= render_for_model(@post, "section_sources", as: :post) %>
<%= render_for_model(@post, "section_similar_posts", as: :post) %>
</div>

View File

@@ -1,7 +1,6 @@
<% cache [user, "name_icon_and_status"] do %>
<section class="animated-shadow-sky sky-section flex divide-none p-3">
<div class="flex grow items-center gap-4">
<img
<section class="animated-shadow-sky sky-section flex divide-none p-3">
<div class="flex grow items-center gap-4">
<img
src="<%= domain_user_avatar_img_src_path(user.avatar) %>"
class="h-12 w-12 rounded-lg"
/>
@@ -9,39 +8,38 @@
<div class="text-lg font-bold text-slate-900">
<%= user.name_for_view %>
</div>
<div class="flex gap-6 text-sm text-slate-400">
<% if policy(user).view_page_scanned_at_timestamps? %>
<div class="flex flex-col">
<span class="font-medium italic text-slate-500">Status</span>
<span class=""><%= user.account_status_for_view %></span>
</div>
<% end %>
<%= render_for_model user, "overview_details", as: :user %>
<div class="flex gap-6 text-sm text-slate-400">
<% if policy(user).view_page_scanned_at_timestamps? %>
<div class="flex flex-col">
<span class="font-medium italic text-slate-500">Registered</span>
<span class="">
<% if registered_at = user.registered_at_for_view %>
<%= time_ago_in_words(registered_at) %>
ago
<% else %>
unknown
<% end %>
</span>
<span class="font-medium italic text-slate-500">Status</span>
<span class=""><%= user.account_status_for_view %></span>
</div>
<% end %>
<%= render_for_model user, "overview_details", as: :user %>
<div class="flex flex-col">
<span class="font-medium italic text-slate-500">Registered</span>
<span class="">
<% if registered_at = user.registered_at_for_view %>
<%= time_ago_in_words(registered_at) %>
ago
<% else %>
unknown
<% end %>
</span>
</div>
</div>
</div>
<a
</div>
<a
href="<%= user.external_url_for_view %>"
target="_blank"
rel="noopener noreferrer"
class="blue-link flex items-center gap-2"
>
<img src="<%= site_icon_path_for_user(user) %>" class="h-5 w-5" />
<span class="font-bold flex items-center gap-1">
<span><%= site_name_for_user(user) %></span>
<%= render "shared/icons/external_link" %>
</span>
</a>
</section>
<% end %>
<img src="<%= site_icon_path_for_user(user) %>" class="h-5 w-5" />
<span class="font-bold flex items-center gap-1">
<span><%= site_name_for_user(user) %></span>
<%= render "shared/icons/external_link" %>
</span>
</a>
</section>

View File

@@ -1,10 +1,10 @@
<% factors = Domain::Factors::UserUserFollowToFactors.find_by(user: user) %>
<section class="animated-shadow-sky sky-section">
<h2 class="section-header">
<span class="font-medium text-slate-900">Similar Users</span>
</h2>
<% factors = Domain::Factors::UserUserFollowToFactors.find_by(user: user) %>
<% if factors %>
<% nearest_neighbors = factors.nearest_neighbors(:embedding, distance: "euclidean").includes(:user).limit(10) %>
<% nearest_neighbors = Domain::NeighborFinder.find_neighbors(factors).includes(:user).limit(10) %>
<% nearest_neighbors.each do |neighbor| %>
<% user = neighbor.user %>
<div class="flex items-center gap-2 whitespace-nowrap text-slate-600 justify-between w-full px-4 py-2">

View File

@@ -2,23 +2,15 @@
<%= render_for_model @user, "name_icon_and_status", as: :user %>
<div class="flex flex-col gap-4 sm:flex-row">
<div class="flex flex-col gap-4 w-full sm:w-1/2">
<% cache [@user, "stats"], expires_in: 1.hour do %>
<%= render_for_model @user, "stats", as: :user %>
<% end %>
<% cache [@user, "similar_users"], expires_in: 1.hour do %>
<%= render_for_model @user, "similar_users", as: :user %>
<% end %>
<%= render_for_model @user, "stats", as: :user %>
<%= render_for_model @user, "similar_users", as: :user %>
</div>
<div class="flex flex-col gap-4 w-full sm:w-1/2">
<% if @user.has_created_posts? %>
<% cache [@user, "recent_created_posts"], expires_in: 1.hour do %>
<%= render_for_model @user, "recent_created_posts", as: :user %>
<% end %>
<%= render_for_model @user, "recent_created_posts", as: :user %>
<% end %>
<% if @user.has_faved_posts? %>
<% cache [@user, "recent_faved_posts"], expires_in: 1.hour do %>
<%= render_for_model @user, "recent_faved_posts", as: :user %>
<% end %>
<%= render_for_model @user, "recent_faved_posts", as: :user %>
<% end %>
</div>
</div>

View File

@@ -16,7 +16,7 @@ Rails.application.routes.draw do
namespace :api do
namespace :fa do
get :similar_users, to: "/domain/fa/api#similar_users"
get :similar_users, to: "/domain/users#similar_users"
get :object_statuses, to: "/domain/fa/api#object_statuses"
end
end

View File

@@ -53,6 +53,9 @@ class CreateDomainPostFactors < ActiveRecord::Migration[7.2]
t.timestamps
end
add_index table_name, :embedding, using: :ivfflat, opclass: :vector_l2_ops
add_index table_name,
:embedding,
using: :ivfflat,
opclass: :vector_cosine_ops
end
end

View File

@@ -45,6 +45,7 @@ class ApplicationController
include ::DomainSourceHelper
include ::GoodJobHelper
include ::SourceHelper
include ::TimestampHelper
include ::DeviseHelper
include ::ReactOnRails::Utils::Required
include ::ReactOnRails::Helper

View File

@@ -42,6 +42,7 @@ class DeviseController
include ::DomainSourceHelper
include ::GoodJobHelper
include ::SourceHelper
include ::TimestampHelper
include ::DeviseHelper
include ::ReactOnRails::Utils::Required
include ::ReactOnRails::Helper

View File

@@ -12,9 +12,6 @@ module GeneratedPathHelpersModule
sig { params(args: T.untyped).returns(String) }
def api_fa_object_statuses_path(*args); end
sig { params(args: T.untyped).returns(String) }
def api_fa_search_user_names_path(*args); end
sig { params(args: T.untyped).returns(String) }
def api_fa_similar_users_path(*args); end

View File

@@ -12,9 +12,6 @@ module GeneratedUrlHelpersModule
sig { params(args: T.untyped).returns(String) }
def api_fa_object_statuses_url(*args); end
sig { params(args: T.untyped).returns(String) }
def api_fa_search_user_names_url(*args); end
sig { params(args: T.untyped).returns(String) }
def api_fa_similar_users_url(*args); end

View File

@@ -45,6 +45,7 @@ class Rails::ApplicationController
include ::DomainSourceHelper
include ::GoodJobHelper
include ::SourceHelper
include ::TimestampHelper
include ::DeviseHelper
include ::ReactOnRails::Utils::Required
include ::ReactOnRails::Helper

View File

@@ -45,6 +45,7 @@ class Rails::Conductor::BaseController
include ::DomainSourceHelper
include ::GoodJobHelper
include ::SourceHelper
include ::TimestampHelper
include ::DeviseHelper
include ::ReactOnRails::Utils::Required
include ::ReactOnRails::Helper

View File

@@ -45,6 +45,7 @@ class Rails::HealthController
include ::DomainSourceHelper
include ::GoodJobHelper
include ::SourceHelper
include ::TimestampHelper
include ::DeviseHelper
include ::ReactOnRails::Utils::Required
include ::ReactOnRails::Helper

View File

@@ -1,4 +1,4 @@
# typed: strict
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for types exported from the `neighbor` gem.

View File

@@ -0,0 +1,14 @@
# typed: strict
module Neighbor::Model
# distance: "euclidean" | "cosine"
# precision: "half"
sig do
params(
factor_col: Symbol,
distance: String,
precision: T.nilable(String),
).returns(ActiveRecord::Relation)
end
def nearest_neighbors(factor_col, distance:, precision: nil)
end
end