remove separate dev envs, add faiss

This commit is contained in:
Dylan Knutson
2023-03-31 11:22:47 +09:00
parent 13c9ff0e8c
commit 3c45545eab
17 changed files with 133 additions and 117 deletions

View File

@@ -53,12 +53,12 @@ gem "bootsnap", require: false
# Use Active Storage variants [https://guides.rubyonrails.org/active_storage_overview.html#transforming-images]
# gem "image_processing", "~> 1.2"
group :development_regius, :development_curtus, :test do
group :development, :test do
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
gem "debug", platforms: %i[ mri mingw x64_mingw ]
end
group :development_regius, :development_curtus do
group :development do
# Use console on exceptions pages [https://github.com/rails/web-console]
gem "web-console"
@@ -104,3 +104,4 @@ gem "good_job"
gem "neighbor"
gem "disco"
gem "faiss"

View File

@@ -120,6 +120,9 @@ GEM
erubi (1.12.0)
et-orbi (1.2.7)
tzinfo
faiss (0.2.5)
numo-narray
rice (>= 4.0.2)
ffi (1.15.5)
fugit (1.8.1)
et-orbi (~> 1, >= 1.2.7)
@@ -244,6 +247,7 @@ GEM
reline (0.3.2)
io-console (~> 0.5)
rexml (3.2.5)
rice (4.0.4)
ripcord (2.0.0)
rspec-core (3.12.1)
rspec-support (~> 3.12.0)
@@ -328,6 +332,7 @@ DEPENDENCIES
diffy
discard
disco
faiss
good_job
http-cookie
importmap-rails

View File

@@ -15,7 +15,8 @@ class Scraper::ClientFactory
# public API
def self.get_gallery_dl_client
if Rails.env.test?
# TODO - can probably allow development to hit real websites
if Rails.env.test? || Rails.env.development?
@gallery_dl_client_mock || raise("no gallery dl mock set")
else
_gallery_dl_client_impl
@@ -23,7 +24,7 @@ class Scraper::ClientFactory
end
def self.get_twitter_http_client
if Rails.env.test?
if Rails.env.test? || Rails.env.development?
@http_client_mock || raise("no http client mock set")
else
_http_client_impl(:twitter, Scraper::TwitterHttpClientConfig)
@@ -31,7 +32,7 @@ class Scraper::ClientFactory
end
def self.get_fa_http_client
if Rails.env.test?
if Rails.env.test? || Rails.env.development?
@http_client_mock || raise("no http client mock set")
else
_http_client_impl(:fa, Scraper::FaHttpClientConfig)

View File

@@ -0,0 +1,8 @@
class Domain::Fa::Follow < ReduxApplicationRecord
self.table_name = "domain_fa_follows"
belongs_to :follower,
class_name: "::Domain::Fa::User"
belongs_to :followed,
class_name: "::Domain::Fa::User"
end

View File

@@ -8,6 +8,11 @@ class Domain::Fa::User < ReduxApplicationRecord
inverse_of: :creator,
foreign_key: :creator_id
has_one :disco,
class_name: "::Domain::Fa::UserFactor",
inverse_of: :user,
foreign_key: :user_id
enum :state, [
:ok, # so far so good, user may not yet be scanned
:scan_error, # user has been removed or otherwise, see state_detail

View File

@@ -0,0 +1,6 @@
class Domain::Fa::UserFactor < ReduxApplicationRecord
self.table_name = "domain_fa_user_factors"
belongs_to :user, class_name: "::Domain::Fa::User"
has_neighbors :factors, dimensions: 32
end

View File

@@ -1,4 +0,0 @@
class Domain::Fa::UserFactor
belongs_to :user, class_name: "::Domain::Fa::User"
has_neighbors :factors
end

View File

@@ -82,13 +82,7 @@ testcookies: &testcookies
value: oaidvalue
path: /
development_regius:
direct: *ddwhatnow
proxy-1: *vipvillageworker
dedipath-1: *blazeandwish
serverhost-1: *cottoniq
development_curtus:
development:
direct: *testcookies
proxy-1: *testcookies
dedipath-1: *testcookies

View File

@@ -19,7 +19,7 @@ redux_prod: &redux_prod
migrations_paths: db/redux_migrate
pool: 4
curtus_redux_dev: &curtus_redux_dev
redux_dev: &redux_dev
adapter: postgresql
host: localhost
port: 5432
@@ -60,15 +60,10 @@ local_legacy_test: &local_legacy_test
migrations_paths: db/legacy_migrate
pool: 4
development_regius:
development:
redux:
<<: *redux_prod
legacy:
<<: *legacy_prod
development_curtus:
redux:
<<: *curtus_redux_dev
<<: *redux_dev
# at the moment, no "real" legacy database. just fixtures.
# legacy:
# <<: *legacy_prod

View File

@@ -1,76 +0,0 @@
require "active_support/core_ext/integer/time"
Rack::MiniProfiler.config.position = "top-right"
Rack::MiniProfiler.config.skip_paths = [/\/api\/.+/]
Rack::MiniProfiler.config.disable_caching = false
Rails.application.configure do
# Settings specified here will take precedence over those in config/application.rb.
config.web_console.development_only = false
config.hosts << "localhost"
# In the development environment your application's code is reloaded any time
# it changes. This slows down response time but is perfect for development
# since you don't have to restart the web server when you make code changes.
config.cache_classes = false
# Do not eager load code on boot.
config.eager_load = false
# Show full error reports.
config.consider_all_requests_local = true
# Enable server timing
config.server_timing = true
# Enable/disable caching. By default caching is disabled.
# Run rails dev:cache to toggle caching.
if Rails.root.join("tmp/caching-dev.txt").exist?
config.action_controller.perform_caching = true
config.action_controller.enable_fragment_cache_logging = true
config.cache_store = :memory_store
config.public_file_server.headers = {
"Cache-Control" => "public, max-age=#{2.days.to_i}",
}
else
config.action_controller.perform_caching = false
config.cache_store = :null_store
end
# Store uploaded files on the local file system (see config/storage.yml for options).
config.active_storage.service = :local
# Don't care if the mailer can't send.
config.action_mailer.raise_delivery_errors = false
config.action_mailer.perform_caching = false
# Print deprecation notices to the Rails logger.
config.active_support.deprecation = :log
# Raise exceptions for disallowed deprecations.
config.active_support.disallowed_deprecation = :raise
# Tell Active Support which deprecation messages to disallow.
config.active_support.disallowed_deprecation_warnings = []
# Raise an error on page load if there are pending migrations.
config.active_record.migration_error = :page_load
# Highlight code that triggered database queries in logs.
config.active_record.verbose_query_logs = true
# Suppress logger output for asset requests.
config.assets.quiet = true
# Raises error for missing translations.
# config.i18n.raise_on_missing_translations = true
# Annotate rendered view with file names.
# config.action_view.annotate_rendered_view_with_filenames = true
# Uncomment if you wish to allow Action Cable access from any origin.
# config.action_cable.disable_request_forgery_protection = true
end

View File

@@ -1,14 +1,8 @@
development_regius:
development:
host: "http://grafana.local:8086"
token: "W2ikhmpQxQHZAStgSEK6s6aJxnOqeD4Zz2MI1m_lnD1JX57V9Esqm0zXb3DWbN7Gnj2GdmF_YrcvE8cy6NbIqQ=="
org: "primary"
bucket: "redux-scraper"
development_curtus:
host: "http://grafana.local:8086"
token: "W2ikhmpQxQHZAStgSEK6s6aJxnOqeD4Zz2MI1m_lnD1JX57V9Esqm0zXb3DWbN7Gnj2GdmF_YrcvE8cy6NbIqQ=="
org: "primary"
bucket: "redux-scraper"
bucket: "redux-scraper-dev"
production:
host: "http://grafana.local:8086"

View File

@@ -18,10 +18,7 @@ default: &default
production:
<<: *default
development_regius:
<<: *default
development_curtus:
development:
direct: {}
proxy-1: {}
dedipath-1: {}

View File

@@ -0,0 +1,12 @@
class CreateDomainFaFollows < ActiveRecord::Migration[7.0]
def change
create_table :domain_fa_follows do |t|
t.references :follower
t.references :followed
t.timestamps
end
add_foreign_key :domain_fa_follows, :domain_fa_users, column: :follower_id
add_foreign_key :domain_fa_follows, :domain_fa_users, column: :followed_id
end
end

13
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2023_03_30_130506) do
ActiveRecord::Schema[7.0].define(version: 2023_03_30_134212) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_stat_statements"
enable_extension "pg_trgm"
@@ -104,6 +104,15 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_30_130506) do
t.index ["name"], name: "index_domain_e621_tags_on_name", unique: true
end
create_table "domain_fa_follows", force: :cascade do |t|
t.bigint "follower_id"
t.bigint "followed_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["followed_id"], name: "index_domain_fa_follows_on_followed_id"
t.index ["follower_id"], name: "index_domain_fa_follows_on_follower_id"
end
create_table "domain_fa_posts", force: :cascade do |t|
t.integer "fa_id"
t.bigint "creator_id"
@@ -338,6 +347,8 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_30_130506) do
add_foreign_key "blob_entries", "blob_entries", column: "base_sha256", primary_key: "sha256"
add_foreign_key "domain_e621_post_versions", "domain_e621_posts", column: "item_id"
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "followed_id"
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "follower_id"
add_foreign_key "domain_fa_posts", "domain_fa_users", column: "creator_id"
add_foreign_key "domain_fa_posts", "http_log_entries", column: "file_id"
add_foreign_key "domain_fa_user_factors", "domain_fa_users", column: "user_id"

View File

@@ -0,0 +1,61 @@
require "rails_helper"
describe Domain::Fa::UserFactor do
it "works" do
# a few clusters of users, followed by a few others
cluster1 = 10.times.map do |i|
SpecUtil.create_domain_fa_user(name: "cluster-1-#{i}")
end
cluster2 = 10.times.map do |i|
SpecUtil.create_domain_fa_user(name: "cluster-2-#{i}")
end
cluster1_ids = cluster1.map(&:name)
cluster2_ids = cluster2.map(&:name)
follower1, follower2, follower3 = 3.times.map do |i|
SpecUtil.create_domain_fa_user(name: "follower-#{i + 1}")
end
# follower1 follows cluster1
cluster1.each do |user|
Domain::Fa::Follow.create!(follower: follower1, followed: user)
end
# follower2 follows cluster2
cluster2.each do |user|
Domain::Fa::Follow.create!(follower: follower2, followed: user)
end
# follower3 follows everybody
(cluster1 + cluster2).each do |user|
Domain::Fa::Follow.create!(follower: follower3, followed: user)
end
# calculate the recommender
recommender = Disco::Recommender.new(
factors: Domain::Fa::UserFactor.neighbor_attributes[:factors][:dimensions],
)
query = Enumerator.new do |e|
Domain::Fa::Follow.all.find_each do |follow|
e << follow
end
end
recommender.fit(query.map do |f|
{ user_id: f.follower.name, item_id: f.followed.name }
end)
recommender.optimize_item_recs
# cluster1 should all be similar to each other
cluster1.each do |user|
ids = Set.new(recommender.similar_items(user.name, count: 9))
expected = Set.new(cluster1_ids)
actual = Set.new(ids.map do |id|
id[:item_id]
end)
assert expected.superset?(actual), ids
end
end
end

View File

@@ -3,7 +3,7 @@ require "rspec/mocks"
class SpecUtil
extend RSpec::Mocks::ExampleMethods
def self.random_string(length)
def self.random_string(length = 8)
(0...length).map { (65 + rand(26)).chr }.join
end
@@ -156,12 +156,18 @@ class SpecUtil
end
end
def self.build_domain_fa_user
def self.build_domain_fa_user(name: nil)
Domain::Fa::User.new(
name: random_string(10),
name: name || random_string,
)
end
def self.create_domain_fa_user(...)
build_domain_fa_user(...).tap do |user|
user.save!
end
end
def self.build_domain_fa_post(creator: nil, fa_id: nil)
@last_fa_id ||= 0
@last_fa_id += 1