make factors and epochs variable

This commit is contained in:
Dylan Knutson
2023-03-31 20:45:57 +09:00
parent 69f1d19ef2
commit 92d79a9f9d
8 changed files with 90 additions and 45 deletions

View File

@@ -5,9 +5,13 @@ class Domain::Fa::BulkJob
now = Time.now
ret = yield
duration = Time.now - now
duration_ms = (1000 * duration).to_i
if duration >= 5
duration_str = "#{duration.round(2).to_s.bold} sec"
else
duration_str = "#{(1000 * duration).to_i.to_s.bold} ms"
end
title = title.call(ret) if title.respond_to?(:call)
logger.info "#{title} - #{duration_ms.to_s.bold} ms"
logger.info "#{title} - #{duration_str}"
ret
end
end

View File

@@ -1,8 +1,11 @@
class Domain::Fa::FactorCalculator < Domain::Fa::BulkJob
def initialize
def initialize(epochs = 20)
factors = Domain::Fa::UserFactor::FACTORS_WIDTHS
@recommender = Disco::Recommender.new(
factors: Domain::Fa::UserFactor.neighbor_attributes[:follows][:dimensions],
factors: factors,
epochs: epochs,
)
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
end
def fit
@@ -17,28 +20,49 @@ class Domain::Fa::FactorCalculator < Domain::Fa::BulkJob
@recommender.fit(dataset)
end
measure("optimize recs") do
@recommender.optimize_item_recs
end
# measure("optimize recs") do
# @recommender.optimize_item_recs
# end
end
def write_factors
total = 0
logger.info "writing #{@recommender.item_ids.length.to_s.bold} factor models"
for_followed_width = Domain::Fa::UserFactor.columns_hash["for_followed"].sql_type_metadata.limit
for_follower_width = Domain::Fa::UserFactor.columns_hash["for_follower"].sql_type_metadata.limit
@recommender.item_ids.map do |item_id|
measure("#{"for_followed".bold} - done") do
write_factors_col(:item_ids, :item_factors, :for_followed)
end
measure("#{"for_follower".bold} - done") do
write_factors_col(:user_ids, :user_factors, :for_follower)
end
end
def write_factors_col(id_list_name, getter_name, factors_col_name)
total = 0
id_list = @recommender.send(id_list_name)
native_col_width = Domain::Fa::UserFactor.
columns_hash[factors_col_name.to_s].
sql_type_metadata.
limit
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
id_list.map do |user_id|
factors = @recommender.send(getter_name, user_id)
padding = [0.0] * (native_col_width - factors.length)
{
user_id: item_id,
follows: @recommender.item_factors(item_id),
user_id: user_id,
factors_col_name => padding + factors.to_a,
}
end.each_slice(10000) do |chunk|
end.each_slice(20000) do |chunk|
total += chunk.size
measure("wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)") do
measure(" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)") do
Domain::Fa::UserFactor.upsert_all(
chunk,
unique_by: :user_id,
update_only: %i[follows],
returning: %i[id],
update_only: factors_col_name,
returning: :id,
)
end
end

View File

@@ -75,28 +75,6 @@ class Domain::Fa::User < ReduxApplicationRecord
user
end
# users similar to this one by the set of users that follow this
def similar_users_by_followed(exclude_already_followed = nil)
disco_query = similar_users_disco_by_followed(exclude_already_followed)
# include the 'neighbor_distance' field, already computed by disco
Domain::Fa::User.
select("domain_fa_users.*", disco_query.select_values.last).
joins(:disco).
merge(disco_query.reselect(:user_id))
end
def similar_users_disco_by_followed(exclude_already_followed = nil)
query = self.
disco.
nearest_neighbors(:follows, distance: "euclidean")
if exclude_already_followed
query = query.where.not(user_id: exclude_already_followed.follows.select(:followed_id))
end
query
end
def self.find_or_build_from_submission_parser(submission_parser)
unless submission_parser.is_a?(Domain::Fa::Parser::ListedSubmissionParserHelper) ||
submission_parser.is_a?(Domain::Fa::Parser::SubmissionParserHelper)
@@ -111,4 +89,31 @@ class Domain::Fa::User < ReduxApplicationRecord
def self.name_to_url_name(name)
name.delete("_").delete("!").downcase
end
# users similar to this one by the set of users that follow this
def similar_users_by_follower
similar_users_by(:for_follower, nil)
end
# users similar to this one by the set of users that follow this
def similar_users_by_followed(exclude_already_followed = nil)
similar_users_by(:for_followed, exclude_already_followed)
end
private
def similar_users_by(factor_col, exclude_already_followed)
query = self.
disco.
nearest_neighbors(factor_col, distance: "euclidean")
if exclude_already_followed
query = query.where.not(user_id: exclude_already_followed.follows.select(:followed_id))
end
Domain::Fa::User.
select("domain_fa_users.*", query.select_values.last).
joins(:disco).
merge(query.reselect(:user_id))
end
end

View File

@@ -2,5 +2,8 @@ class Domain::Fa::UserFactor < ReduxApplicationRecord
self.table_name = "domain_fa_user_factors"
belongs_to :user, class_name: "::Domain::Fa::User"
has_neighbors :follows, dimensions: 32
FACTORS_WIDTHS = 8
has_neighbors :for_follower
has_neighbors :for_followed
end

View File

@@ -1,12 +1,18 @@
class CreateDomainFaUserFactorsTable < ActiveRecord::Migration[7.0]
def change
def up
create_table :domain_fa_user_factors do |t|
t.references :user, index: { unique: true }, null: false
t.vector :follows, limit: 32
t.vector :for_follower, limit: 32
t.vector :for_followed, limit: 32
t.timestamps
end
add_index :domain_fa_user_factors, :follows, using: :ivfflat, opclass: :vector_l2_ops
add_index :domain_fa_user_factors, :for_follower, using: :ivfflat, opclass: :vector_l2_ops
add_index :domain_fa_user_factors, :for_followed, using: :ivfflat, opclass: :vector_l2_ops
add_foreign_key :domain_fa_user_factors, :domain_fa_users, column: :user_id, primary_key: :id, validate: true
end
def down
drop_table :domain_fa_user_factors
end
end

6
db/schema.rb generated
View File

@@ -139,10 +139,12 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
create_table "domain_fa_user_factors", force: :cascade do |t|
t.bigint "user_id", null: false
t.vector "follows", limit: 32
t.vector "for_follower", limit: 32
t.vector "for_followed", limit: 32
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["follows"], name: "index_domain_fa_user_factors_on_follows", using: :ivfflat
t.index ["for_followed"], name: "index_domain_fa_user_factors_on_for_followed", using: :ivfflat
t.index ["for_follower"], name: "index_domain_fa_user_factors_on_for_follower", using: :ivfflat
t.index ["user_id"], name: "index_domain_fa_user_factors_on_user_id", unique: true
end

View File

@@ -37,7 +37,8 @@ namespace :fa do
desc "calculate user follow factors"
task :calculate_follow_factors => [:set_logger_stdout, :environment] do
worker = Domain::Fa::FactorCalculator.new
epochs = (ENV["epochs"] || 20).to_i
worker = Domain::Fa::FactorCalculator.new(epochs)
worker.fit
worker.write_factors
end

View File

@@ -40,7 +40,7 @@ describe Domain::Fa::UserFactor do
# calculate the recommender
recommender = Disco::Recommender.new(
factors: Domain::Fa::UserFactor.neighbor_attributes[:follows][:dimensions],
factors: Domain::Fa::UserFactor::FACTORS_WIDTHS,
)
query = Enumerator.new do |e|
Domain::Fa::Follow.all.find_each do |follow|