remove unused code, factor calculators

This commit is contained in:
Dylan Knutson
2024-12-30 20:30:23 +00:00
parent 52498b3cc2
commit 02a9fd4b34
4 changed files with 0 additions and 230 deletions

View File

@@ -1,65 +0,0 @@
class Domain::Fa::PostFactorCalculator
include HasMeasureDuration
def initialize(epochs = 20)
factors = Domain::Fa::PostFactor::FACTORS_WIDTHS
@recommender = Disco::Recommender.new(factors: factors, epochs: epochs)
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
end
def fit
limit = 10_000_000
dataset =
measure(
proc do |r|
r && "loaded #{r.length.to_s.bold} favs" ||
"loading up to #{limit.to_s.bold} favs"
end,
) { Domain::Fa::Fav.all.limit(limit).pluck(:user_id, :post_id).to_a }
measure("convert to hash") do
dataset.map! { |user_id, post_id| { user_id: user_id, item_id: post_id } }
end
measure("fit #{dataset.length.to_s.bold} favs") do
@recommender.fit(dataset)
end
end
def write_factors
measure("write factors") do
write_factors_col(:item_ids, :item_factors, :for_favorite)
end
end
def write_factors_col(id_list_name, getter_name, factors_col_name)
total = 0
id_list = @recommender.send(id_list_name)
native_col_width =
Domain::Fa::PostFactor.columns_hash[
factors_col_name.to_s
].sql_type_metadata.limit
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
id_list
.map do |post_id|
factors = @recommender.send(getter_name, post_id)
padding = [0.0] * (native_col_width - factors.length)
{ :post_id => post_id, factors_col_name => padding + factors.to_a }
end
.each_slice(20_000) do |chunk|
total += chunk.size
measure(
" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)",
) do
Domain::Fa::PostFactor.upsert_all(
chunk,
unique_by: :post_id,
update_only: factors_col_name,
returning: :id,
)
end
end
end
end

View File

@@ -1,75 +0,0 @@
require "rails_helper"
RSpec.describe Domain::Fa::PostFactor do
it "provides an example of how to use disco / neighbor" do
creator1 = SpecUtil.create_domain_fa_user(name: "creator-1")
cluster1 =
10.times.map do |i|
SpecUtil.create_domain_fa_post(
creator: creator1,
fa_id: "1000000#{i}".to_i
)
end
cluster2 =
10.times.map do |i|
SpecUtil.create_domain_fa_post(
creator: creator1,
fa_id: "2000000#{i}".to_i
)
end
cluster1_ids = Set.new cluster1.map(&:fa_id)
cluster2_ids = Set.new cluster2.map(&:fa_id)
faver1, faver2, faver3, faver4 =
4.times.map { |i| SpecUtil.create_domain_fa_user(name: "faver-#{i + 1}") }
# faver1 favs cluster1
cluster1.each { |post| Domain::Fa::Fav.create!(user: faver1, post: post) }
# faver2 favs cluster2
cluster2.each { |post| Domain::Fa::Fav.create!(user: faver2, post: post) }
# faver3 favs everything
(cluster1 + cluster2).each do |post|
Domain::Fa::Fav.create!(user: faver3, post: post)
end
# faver4 favs a few in cluster1
cluster1[0...2].each do |post|
Domain::Fa::Fav.create!(user: faver4, post: post)
end
# calculate the recommender
recommender =
Disco::Recommender.new(factors: Domain::Fa::PostFactor::FACTORS_WIDTHS)
query = Enumerator.new { |e| Domain::Fa::Fav.find_each { |fav| e << fav } }
recommender.fit(
query.map { |f| { user_id: f.user.name, item_id: f.post.fa_id } }
)
recommender.optimize_item_recs
# cluster1 posts should all be similar to each other
cluster1.each do |post|
similar_items = Set.new(recommender.similar_items(post.fa_id, count: 9))
similar_item_ids = Set.new(similar_items.map { |item| item[:item_id] })
assert cluster1_ids.superset?(similar_item_ids), similar_items
end
# cluster2 posts should all be similar to each other
cluster2.each do |post|
similar_items = Set.new(recommender.similar_items(post.fa_id, count: 9))
similar_item_ids = Set.new(similar_items.map { |item| item[:item_id] })
assert cluster2_ids.superset?(similar_item_ids), similar_items
end
# faver4's fav recommendations should be other posts in cluster1
faver4_recs = Set.new(recommender.user_recs(faver4.name))
faver4_rec_ids = Set.new(faver4_recs.map { |item| item[:item_id] })
assert cluster1_ids.superset?(faver4_rec_ids), faver4_recs
end
end

View File

@@ -1,69 +0,0 @@
require "rails_helper"
describe Domain::Fa::UserFactor do
it "provides an example of how to use disco / neighbor" do
# a few clusters of users, followed by a few others
cluster1 =
10.times.map do |i|
SpecUtil.create_domain_fa_user(name: "cluster-1-#{i}")
end
cluster2 =
10.times.map do |i|
SpecUtil.create_domain_fa_user(name: "cluster-2-#{i}")
end
cluster1_ids = Set.new cluster1.map(&:name)
cluster2_ids = Set.new cluster2.map(&:name)
follower1, follower2, follower3, follower4 =
4.times.map do |i|
SpecUtil.create_domain_fa_user(name: "follower-#{i + 1}")
end
# follower1 follows cluster1
cluster1.each do |user|
Domain::Fa::Follow.create!(follower: follower1, followed: user)
end
# follower2 follows cluster2
cluster2.each do |user|
Domain::Fa::Follow.create!(follower: follower2, followed: user)
end
# follower3 follows everybody
(cluster1 + cluster2).each do |user|
Domain::Fa::Follow.create!(follower: follower3, followed: user)
end
# follower4 follows a few in cluster1
cluster1[0...2].each do |user|
Domain::Fa::Follow.create!(follower: follower4, followed: user)
end
# calculate the recommender
recommender =
Disco::Recommender.new(factors: Domain::Fa::UserFactor::FACTORS_WIDTHS)
query =
Enumerator.new do |e|
Domain::Fa::Follow.find_each { |follow| e << follow }
end
recommender.fit(
query.map { |f| { user_id: f.follower.name, item_id: f.followed.name } }
)
recommender.optimize_item_recs
# cluster1 should all be similar to each other
cluster1.each do |user|
similar_items = Set.new(recommender.similar_items(user.name, count: 9))
similar_item_ids = Set.new(similar_items.map { |item| item[:item_id] })
assert cluster1_ids.superset?(similar_item_ids), similar_items
end
# follower4's follow recommendations should be other users in cluster1
follower4_recs = Set.new(recommender.user_recs(follower4.name))
follower4_rec_ids = Set.new(follower4_recs.map { |item| item[:item_id] })
assert cluster1_ids.superset?(follower4_rec_ids), follower4_recs
end
end

View File

@@ -33,27 +33,6 @@ class SpecUtil
mock
end
def self.mock_curl_easy(
expected_url,
response_code: 200,
body_str: "http body",
request_headers: {},
response_headers_str: "HTTP 200\r\nresp: respheader\r\ncontent-type: text/plain\r\n\r\n"
)
mock = instance_double("Curl::Easy")
allow(mock).to receive(:url=).with(expected_url)
allow(mock).to receive(:headers=).with(Hash)
allow(mock).to receive(:perform)
allow(mock).to receive(:response_code).and_return(response_code)
allow(mock).to receive(:body_str).and_return(body_str)
allow(mock).to receive(:header_str).and_return(response_headers_str)
allow(mock).to receive(:headers).and_return(request_headers)
mock
end
def self.build_http_log_entry(
uri: "http://example.com/",
status_code: 200,