69 lines
2.0 KiB
Ruby
69 lines
2.0 KiB
Ruby
# typed: false
|
|
class Domain::Fa::UserFactorCalculator
|
|
include HasMeasureDuration
|
|
|
|
def initialize(epochs = 20)
|
|
factors = Domain::Fa::UserFactor::FACTORS_WIDTHS
|
|
@recommender = Disco::Recommender.new(factors: factors, epochs: epochs)
|
|
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
|
|
end
|
|
|
|
def fit
|
|
dataset =
|
|
measure(
|
|
proc do |r|
|
|
r && "loaded #{r.length.to_s.bold} follows" || "loading follows"
|
|
end,
|
|
) do
|
|
Domain::Fa::Follow
|
|
.all
|
|
.pluck(:follower_id, :followed_id)
|
|
.map { |id1, id2| { user_id: id1, item_id: id2 } }
|
|
end
|
|
|
|
measure("fit #{dataset.length.to_s.bold} follows") do
|
|
@recommender.fit(dataset)
|
|
end
|
|
end
|
|
|
|
def write_factors
|
|
measure("#{"for_followed".bold} - done") do
|
|
write_factors_col(:item_ids, :item_factors, :for_followed)
|
|
end
|
|
measure("#{"for_follower".bold} - done") do
|
|
write_factors_col(:user_ids, :user_factors, :for_follower)
|
|
end
|
|
end
|
|
|
|
def write_factors_col(id_list_name, getter_name, factors_col_name)
|
|
total = 0
|
|
id_list = @recommender.send(id_list_name)
|
|
native_col_width =
|
|
Domain::Fa::UserFactor.columns_hash[
|
|
factors_col_name.to_s
|
|
].sql_type_metadata.limit
|
|
|
|
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
|
|
|
|
id_list
|
|
.map do |user_id|
|
|
factors = @recommender.send(getter_name, user_id)
|
|
padding = [0.0] * (native_col_width - factors.length)
|
|
{ :user_id => user_id, factors_col_name => padding + factors.to_a }
|
|
end
|
|
.each_slice(20_000) do |chunk|
|
|
total += chunk.size
|
|
measure(
|
|
" -> write #{chunk.size.to_s.bold} factors - (#{total.to_s.bold} total)",
|
|
) do
|
|
Domain::Fa::UserFactor.upsert_all(
|
|
chunk,
|
|
unique_by: :user_id,
|
|
update_only: factors_col_name,
|
|
returning: :id,
|
|
)
|
|
end
|
|
end
|
|
end
|
|
end
|