make factors and epochs variable
This commit is contained in:
@@ -5,9 +5,13 @@ class Domain::Fa::BulkJob
|
||||
now = Time.now
|
||||
ret = yield
|
||||
duration = Time.now - now
|
||||
duration_ms = (1000 * duration).to_i
|
||||
if duration >= 5
|
||||
duration_str = "#{duration.round(2).to_s.bold} sec"
|
||||
else
|
||||
duration_str = "#{(1000 * duration).to_i.to_s.bold} ms"
|
||||
end
|
||||
title = title.call(ret) if title.respond_to?(:call)
|
||||
logger.info "#{title} - #{duration_ms.to_s.bold} ms"
|
||||
logger.info "#{title} - #{duration_str}"
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
class Domain::Fa::FactorCalculator < Domain::Fa::BulkJob
|
||||
def initialize
|
||||
def initialize(epochs = 20)
|
||||
factors = Domain::Fa::UserFactor::FACTORS_WIDTHS
|
||||
@recommender = Disco::Recommender.new(
|
||||
factors: Domain::Fa::UserFactor.neighbor_attributes[:follows][:dimensions],
|
||||
factors: factors,
|
||||
epochs: epochs,
|
||||
)
|
||||
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
|
||||
end
|
||||
|
||||
def fit
|
||||
@@ -17,28 +20,49 @@ class Domain::Fa::FactorCalculator < Domain::Fa::BulkJob
|
||||
@recommender.fit(dataset)
|
||||
end
|
||||
|
||||
measure("optimize recs") do
|
||||
@recommender.optimize_item_recs
|
||||
end
|
||||
# measure("optimize recs") do
|
||||
# @recommender.optimize_item_recs
|
||||
# end
|
||||
end
|
||||
|
||||
def write_factors
|
||||
total = 0
|
||||
logger.info "writing #{@recommender.item_ids.length.to_s.bold} factor models"
|
||||
for_followed_width = Domain::Fa::UserFactor.columns_hash["for_followed"].sql_type_metadata.limit
|
||||
for_follower_width = Domain::Fa::UserFactor.columns_hash["for_follower"].sql_type_metadata.limit
|
||||
|
||||
@recommender.item_ids.map do |item_id|
|
||||
measure("#{"for_followed".bold} - done") do
|
||||
write_factors_col(:item_ids, :item_factors, :for_followed)
|
||||
end
|
||||
measure("#{"for_follower".bold} - done") do
|
||||
write_factors_col(:user_ids, :user_factors, :for_follower)
|
||||
end
|
||||
end
|
||||
|
||||
def write_factors_col(id_list_name, getter_name, factors_col_name)
|
||||
total = 0
|
||||
id_list = @recommender.send(id_list_name)
|
||||
native_col_width = Domain::Fa::UserFactor.
|
||||
columns_hash[factors_col_name.to_s].
|
||||
sql_type_metadata.
|
||||
limit
|
||||
|
||||
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
|
||||
|
||||
id_list.map do |user_id|
|
||||
factors = @recommender.send(getter_name, user_id)
|
||||
padding = [0.0] * (native_col_width - factors.length)
|
||||
{
|
||||
user_id: item_id,
|
||||
follows: @recommender.item_factors(item_id),
|
||||
user_id: user_id,
|
||||
factors_col_name => padding + factors.to_a,
|
||||
}
|
||||
end.each_slice(10000) do |chunk|
|
||||
end.each_slice(20000) do |chunk|
|
||||
total += chunk.size
|
||||
measure("wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)") do
|
||||
measure(" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)") do
|
||||
Domain::Fa::UserFactor.upsert_all(
|
||||
chunk,
|
||||
unique_by: :user_id,
|
||||
update_only: %i[follows],
|
||||
returning: %i[id],
|
||||
update_only: factors_col_name,
|
||||
returning: :id,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -75,28 +75,6 @@ class Domain::Fa::User < ReduxApplicationRecord
|
||||
user
|
||||
end
|
||||
|
||||
# users similar to this one by the set of users that follow this
|
||||
def similar_users_by_followed(exclude_already_followed = nil)
|
||||
disco_query = similar_users_disco_by_followed(exclude_already_followed)
|
||||
# include the 'neighbor_distance' field, already computed by disco
|
||||
Domain::Fa::User.
|
||||
select("domain_fa_users.*", disco_query.select_values.last).
|
||||
joins(:disco).
|
||||
merge(disco_query.reselect(:user_id))
|
||||
end
|
||||
|
||||
def similar_users_disco_by_followed(exclude_already_followed = nil)
|
||||
query = self.
|
||||
disco.
|
||||
nearest_neighbors(:follows, distance: "euclidean")
|
||||
|
||||
if exclude_already_followed
|
||||
query = query.where.not(user_id: exclude_already_followed.follows.select(:followed_id))
|
||||
end
|
||||
|
||||
query
|
||||
end
|
||||
|
||||
def self.find_or_build_from_submission_parser(submission_parser)
|
||||
unless submission_parser.is_a?(Domain::Fa::Parser::ListedSubmissionParserHelper) ||
|
||||
submission_parser.is_a?(Domain::Fa::Parser::SubmissionParserHelper)
|
||||
@@ -111,4 +89,31 @@ class Domain::Fa::User < ReduxApplicationRecord
|
||||
def self.name_to_url_name(name)
|
||||
name.delete("_").delete("!").downcase
|
||||
end
|
||||
|
||||
# users similar to this one by the set of users that follow this
|
||||
def similar_users_by_follower
|
||||
similar_users_by(:for_follower, nil)
|
||||
end
|
||||
|
||||
# users similar to this one by the set of users that follow this
|
||||
def similar_users_by_followed(exclude_already_followed = nil)
|
||||
similar_users_by(:for_followed, exclude_already_followed)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def similar_users_by(factor_col, exclude_already_followed)
|
||||
query = self.
|
||||
disco.
|
||||
nearest_neighbors(factor_col, distance: "euclidean")
|
||||
|
||||
if exclude_already_followed
|
||||
query = query.where.not(user_id: exclude_already_followed.follows.select(:followed_id))
|
||||
end
|
||||
|
||||
Domain::Fa::User.
|
||||
select("domain_fa_users.*", query.select_values.last).
|
||||
joins(:disco).
|
||||
merge(query.reselect(:user_id))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2,5 +2,8 @@ class Domain::Fa::UserFactor < ReduxApplicationRecord
|
||||
self.table_name = "domain_fa_user_factors"
|
||||
|
||||
belongs_to :user, class_name: "::Domain::Fa::User"
|
||||
has_neighbors :follows, dimensions: 32
|
||||
|
||||
FACTORS_WIDTHS = 8
|
||||
has_neighbors :for_follower
|
||||
has_neighbors :for_followed
|
||||
end
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
class CreateDomainFaUserFactorsTable < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
def up
|
||||
create_table :domain_fa_user_factors do |t|
|
||||
t.references :user, index: { unique: true }, null: false
|
||||
t.vector :follows, limit: 32
|
||||
t.vector :for_follower, limit: 32
|
||||
t.vector :for_followed, limit: 32
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_index :domain_fa_user_factors, :follows, using: :ivfflat, opclass: :vector_l2_ops
|
||||
add_index :domain_fa_user_factors, :for_follower, using: :ivfflat, opclass: :vector_l2_ops
|
||||
add_index :domain_fa_user_factors, :for_followed, using: :ivfflat, opclass: :vector_l2_ops
|
||||
add_foreign_key :domain_fa_user_factors, :domain_fa_users, column: :user_id, primary_key: :id, validate: true
|
||||
end
|
||||
|
||||
def down
|
||||
drop_table :domain_fa_user_factors
|
||||
end
|
||||
end
|
||||
|
||||
6
db/schema.rb
generated
6
db/schema.rb
generated
@@ -139,10 +139,12 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
|
||||
|
||||
create_table "domain_fa_user_factors", force: :cascade do |t|
|
||||
t.bigint "user_id", null: false
|
||||
t.vector "follows", limit: 32
|
||||
t.vector "for_follower", limit: 32
|
||||
t.vector "for_followed", limit: 32
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["follows"], name: "index_domain_fa_user_factors_on_follows", using: :ivfflat
|
||||
t.index ["for_followed"], name: "index_domain_fa_user_factors_on_for_followed", using: :ivfflat
|
||||
t.index ["for_follower"], name: "index_domain_fa_user_factors_on_for_follower", using: :ivfflat
|
||||
t.index ["user_id"], name: "index_domain_fa_user_factors_on_user_id", unique: true
|
||||
end
|
||||
|
||||
|
||||
@@ -37,7 +37,8 @@ namespace :fa do
|
||||
|
||||
desc "calculate user follow factors"
|
||||
task :calculate_follow_factors => [:set_logger_stdout, :environment] do
|
||||
worker = Domain::Fa::FactorCalculator.new
|
||||
epochs = (ENV["epochs"] || 20).to_i
|
||||
worker = Domain::Fa::FactorCalculator.new(epochs)
|
||||
worker.fit
|
||||
worker.write_factors
|
||||
end
|
||||
|
||||
@@ -40,7 +40,7 @@ describe Domain::Fa::UserFactor do
|
||||
|
||||
# calculate the recommender
|
||||
recommender = Disco::Recommender.new(
|
||||
factors: Domain::Fa::UserFactor.neighbor_attributes[:follows][:dimensions],
|
||||
factors: Domain::Fa::UserFactor::FACTORS_WIDTHS,
|
||||
)
|
||||
query = Enumerator.new do |e|
|
||||
Domain::Fa::Follow.all.find_each do |follow|
|
||||
|
||||
Reference in New Issue
Block a user