remove unused code, factor calculators
This commit is contained in:
@@ -1,65 +0,0 @@
|
||||
class Domain::Fa::PostFactorCalculator
|
||||
include HasMeasureDuration
|
||||
|
||||
def initialize(epochs = 20)
|
||||
factors = Domain::Fa::PostFactor::FACTORS_WIDTHS
|
||||
@recommender = Disco::Recommender.new(factors: factors, epochs: epochs)
|
||||
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
|
||||
end
|
||||
|
||||
def fit
|
||||
limit = 10_000_000
|
||||
dataset =
|
||||
measure(
|
||||
proc do |r|
|
||||
r && "loaded #{r.length.to_s.bold} favs" ||
|
||||
"loading up to #{limit.to_s.bold} favs"
|
||||
end,
|
||||
) { Domain::Fa::Fav.all.limit(limit).pluck(:user_id, :post_id).to_a }
|
||||
|
||||
measure("convert to hash") do
|
||||
dataset.map! { |user_id, post_id| { user_id: user_id, item_id: post_id } }
|
||||
end
|
||||
|
||||
measure("fit #{dataset.length.to_s.bold} favs") do
|
||||
@recommender.fit(dataset)
|
||||
end
|
||||
end
|
||||
|
||||
def write_factors
|
||||
measure("write factors") do
|
||||
write_factors_col(:item_ids, :item_factors, :for_favorite)
|
||||
end
|
||||
end
|
||||
|
||||
def write_factors_col(id_list_name, getter_name, factors_col_name)
|
||||
total = 0
|
||||
id_list = @recommender.send(id_list_name)
|
||||
native_col_width =
|
||||
Domain::Fa::PostFactor.columns_hash[
|
||||
factors_col_name.to_s
|
||||
].sql_type_metadata.limit
|
||||
|
||||
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
|
||||
|
||||
id_list
|
||||
.map do |post_id|
|
||||
factors = @recommender.send(getter_name, post_id)
|
||||
padding = [0.0] * (native_col_width - factors.length)
|
||||
{ :post_id => post_id, factors_col_name => padding + factors.to_a }
|
||||
end
|
||||
.each_slice(20_000) do |chunk|
|
||||
total += chunk.size
|
||||
measure(
|
||||
" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)",
|
||||
) do
|
||||
Domain::Fa::PostFactor.upsert_all(
|
||||
chunk,
|
||||
unique_by: :post_id,
|
||||
update_only: factors_col_name,
|
||||
returning: :id,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,75 +0,0 @@
|
||||
require "rails_helper"
|
||||
|
||||
RSpec.describe Domain::Fa::PostFactor do
|
||||
it "provides an example of how to use disco / neighbor" do
|
||||
creator1 = SpecUtil.create_domain_fa_user(name: "creator-1")
|
||||
|
||||
cluster1 =
|
||||
10.times.map do |i|
|
||||
SpecUtil.create_domain_fa_post(
|
||||
creator: creator1,
|
||||
fa_id: "1000000#{i}".to_i
|
||||
)
|
||||
end
|
||||
|
||||
cluster2 =
|
||||
10.times.map do |i|
|
||||
SpecUtil.create_domain_fa_post(
|
||||
creator: creator1,
|
||||
fa_id: "2000000#{i}".to_i
|
||||
)
|
||||
end
|
||||
|
||||
cluster1_ids = Set.new cluster1.map(&:fa_id)
|
||||
cluster2_ids = Set.new cluster2.map(&:fa_id)
|
||||
|
||||
faver1, faver2, faver3, faver4 =
|
||||
4.times.map { |i| SpecUtil.create_domain_fa_user(name: "faver-#{i + 1}") }
|
||||
|
||||
# faver1 favs cluster1
|
||||
cluster1.each { |post| Domain::Fa::Fav.create!(user: faver1, post: post) }
|
||||
|
||||
# faver2 favs cluster2
|
||||
cluster2.each { |post| Domain::Fa::Fav.create!(user: faver2, post: post) }
|
||||
|
||||
# faver3 favs everything
|
||||
(cluster1 + cluster2).each do |post|
|
||||
Domain::Fa::Fav.create!(user: faver3, post: post)
|
||||
end
|
||||
|
||||
# faver4 favs a few in cluster1
|
||||
cluster1[0...2].each do |post|
|
||||
Domain::Fa::Fav.create!(user: faver4, post: post)
|
||||
end
|
||||
|
||||
# calculate the recommender
|
||||
recommender =
|
||||
Disco::Recommender.new(factors: Domain::Fa::PostFactor::FACTORS_WIDTHS)
|
||||
query = Enumerator.new { |e| Domain::Fa::Fav.find_each { |fav| e << fav } }
|
||||
recommender.fit(
|
||||
query.map { |f| { user_id: f.user.name, item_id: f.post.fa_id } }
|
||||
)
|
||||
recommender.optimize_item_recs
|
||||
|
||||
# cluster1 posts should all be similar to each other
|
||||
cluster1.each do |post|
|
||||
similar_items = Set.new(recommender.similar_items(post.fa_id, count: 9))
|
||||
similar_item_ids = Set.new(similar_items.map { |item| item[:item_id] })
|
||||
|
||||
assert cluster1_ids.superset?(similar_item_ids), similar_items
|
||||
end
|
||||
|
||||
# cluster2 posts should all be similar to each other
|
||||
cluster2.each do |post|
|
||||
similar_items = Set.new(recommender.similar_items(post.fa_id, count: 9))
|
||||
similar_item_ids = Set.new(similar_items.map { |item| item[:item_id] })
|
||||
|
||||
assert cluster2_ids.superset?(similar_item_ids), similar_items
|
||||
end
|
||||
|
||||
# faver4's fav recommendations should be other posts in cluster1
|
||||
faver4_recs = Set.new(recommender.user_recs(faver4.name))
|
||||
faver4_rec_ids = Set.new(faver4_recs.map { |item| item[:item_id] })
|
||||
assert cluster1_ids.superset?(faver4_rec_ids), faver4_recs
|
||||
end
|
||||
end
|
||||
@@ -1,69 +0,0 @@
|
||||
require "rails_helper"
|
||||
|
||||
describe Domain::Fa::UserFactor do
|
||||
it "provides an example of how to use disco / neighbor" do
|
||||
# a few clusters of users, followed by a few others
|
||||
cluster1 =
|
||||
10.times.map do |i|
|
||||
SpecUtil.create_domain_fa_user(name: "cluster-1-#{i}")
|
||||
end
|
||||
|
||||
cluster2 =
|
||||
10.times.map do |i|
|
||||
SpecUtil.create_domain_fa_user(name: "cluster-2-#{i}")
|
||||
end
|
||||
|
||||
cluster1_ids = Set.new cluster1.map(&:name)
|
||||
cluster2_ids = Set.new cluster2.map(&:name)
|
||||
|
||||
follower1, follower2, follower3, follower4 =
|
||||
4.times.map do |i|
|
||||
SpecUtil.create_domain_fa_user(name: "follower-#{i + 1}")
|
||||
end
|
||||
|
||||
# follower1 follows cluster1
|
||||
cluster1.each do |user|
|
||||
Domain::Fa::Follow.create!(follower: follower1, followed: user)
|
||||
end
|
||||
|
||||
# follower2 follows cluster2
|
||||
cluster2.each do |user|
|
||||
Domain::Fa::Follow.create!(follower: follower2, followed: user)
|
||||
end
|
||||
|
||||
# follower3 follows everybody
|
||||
(cluster1 + cluster2).each do |user|
|
||||
Domain::Fa::Follow.create!(follower: follower3, followed: user)
|
||||
end
|
||||
|
||||
# follower4 follows a few in cluster1
|
||||
cluster1[0...2].each do |user|
|
||||
Domain::Fa::Follow.create!(follower: follower4, followed: user)
|
||||
end
|
||||
|
||||
# calculate the recommender
|
||||
recommender =
|
||||
Disco::Recommender.new(factors: Domain::Fa::UserFactor::FACTORS_WIDTHS)
|
||||
query =
|
||||
Enumerator.new do |e|
|
||||
Domain::Fa::Follow.find_each { |follow| e << follow }
|
||||
end
|
||||
recommender.fit(
|
||||
query.map { |f| { user_id: f.follower.name, item_id: f.followed.name } }
|
||||
)
|
||||
recommender.optimize_item_recs
|
||||
|
||||
# cluster1 should all be similar to each other
|
||||
cluster1.each do |user|
|
||||
similar_items = Set.new(recommender.similar_items(user.name, count: 9))
|
||||
similar_item_ids = Set.new(similar_items.map { |item| item[:item_id] })
|
||||
|
||||
assert cluster1_ids.superset?(similar_item_ids), similar_items
|
||||
end
|
||||
|
||||
# follower4's follow recommendations should be other users in cluster1
|
||||
follower4_recs = Set.new(recommender.user_recs(follower4.name))
|
||||
follower4_rec_ids = Set.new(follower4_recs.map { |item| item[:item_id] })
|
||||
assert cluster1_ids.superset?(follower4_rec_ids), follower4_recs
|
||||
end
|
||||
end
|
||||
@@ -33,27 +33,6 @@ class SpecUtil
|
||||
mock
|
||||
end
|
||||
|
||||
def self.mock_curl_easy(
|
||||
expected_url,
|
||||
response_code: 200,
|
||||
body_str: "http body",
|
||||
request_headers: {},
|
||||
response_headers_str: "HTTP 200\r\nresp: respheader\r\ncontent-type: text/plain\r\n\r\n"
|
||||
)
|
||||
mock = instance_double("Curl::Easy")
|
||||
|
||||
allow(mock).to receive(:url=).with(expected_url)
|
||||
allow(mock).to receive(:headers=).with(Hash)
|
||||
|
||||
allow(mock).to receive(:perform)
|
||||
allow(mock).to receive(:response_code).and_return(response_code)
|
||||
allow(mock).to receive(:body_str).and_return(body_str)
|
||||
allow(mock).to receive(:header_str).and_return(response_headers_str)
|
||||
allow(mock).to receive(:headers).and_return(request_headers)
|
||||
|
||||
mock
|
||||
end
|
||||
|
||||
def self.build_http_log_entry(
|
||||
uri: "http://example.com/",
|
||||
status_code: 200,
|
||||
|
||||
Reference in New Issue
Block a user