Files
redux-scraper/app/lib/domain/fa/user_factor_calculator.rb
2025-01-01 03:29:53 +00:00

69 lines
2.0 KiB
Ruby

# typed: false
class Domain::Fa::UserFactorCalculator
include HasMeasureDuration
def initialize(epochs = 20)
factors = Domain::Fa::UserFactor::FACTORS_WIDTHS
@recommender = Disco::Recommender.new(factors: factors, epochs: epochs)
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
end
def fit
dataset =
measure(
proc do |r|
r && "loaded #{r.length.to_s.bold} follows" || "loading follows"
end,
) do
Domain::Fa::Follow
.all
.pluck(:follower_id, :followed_id)
.map { |id1, id2| { user_id: id1, item_id: id2 } }
end
measure("fit #{dataset.length.to_s.bold} follows") do
@recommender.fit(dataset)
end
end
def write_factors
measure("#{"for_followed".bold} - done") do
write_factors_col(:item_ids, :item_factors, :for_followed)
end
measure("#{"for_follower".bold} - done") do
write_factors_col(:user_ids, :user_factors, :for_follower)
end
end
def write_factors_col(id_list_name, getter_name, factors_col_name)
total = 0
id_list = @recommender.send(id_list_name)
native_col_width =
Domain::Fa::UserFactor.columns_hash[
factors_col_name.to_s
].sql_type_metadata.limit
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
id_list
.map do |user_id|
factors = @recommender.send(getter_name, user_id)
padding = [0.0] * (native_col_width - factors.length)
{ :user_id => user_id, factors_col_name => padding + factors.to_a }
end
.each_slice(20_000) do |chunk|
total += chunk.size
measure(
" -> write #{chunk.size.to_s.bold} factors - (#{total.to_s.bold} total)",
) do
Domain::Fa::UserFactor.upsert_all(
chunk,
unique_by: :user_id,
update_only: factors_col_name,
returning: :id,
)
end
end
end
end