remove separate dev envs, add faiss
This commit is contained in:
5
Gemfile
5
Gemfile
@@ -53,12 +53,12 @@ gem "bootsnap", require: false
|
||||
# Use Active Storage variants [https://guides.rubyonrails.org/active_storage_overview.html#transforming-images]
|
||||
# gem "image_processing", "~> 1.2"
|
||||
|
||||
group :development_regius, :development_curtus, :test do
|
||||
group :development, :test do
|
||||
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
|
||||
gem "debug", platforms: %i[ mri mingw x64_mingw ]
|
||||
end
|
||||
|
||||
group :development_regius, :development_curtus do
|
||||
group :development do
|
||||
# Use console on exceptions pages [https://github.com/rails/web-console]
|
||||
gem "web-console"
|
||||
|
||||
@@ -104,3 +104,4 @@ gem "good_job"
|
||||
|
||||
gem "neighbor"
|
||||
gem "disco"
|
||||
gem "faiss"
|
||||
|
||||
@@ -120,6 +120,9 @@ GEM
|
||||
erubi (1.12.0)
|
||||
et-orbi (1.2.7)
|
||||
tzinfo
|
||||
faiss (0.2.5)
|
||||
numo-narray
|
||||
rice (>= 4.0.2)
|
||||
ffi (1.15.5)
|
||||
fugit (1.8.1)
|
||||
et-orbi (~> 1, >= 1.2.7)
|
||||
@@ -244,6 +247,7 @@ GEM
|
||||
reline (0.3.2)
|
||||
io-console (~> 0.5)
|
||||
rexml (3.2.5)
|
||||
rice (4.0.4)
|
||||
ripcord (2.0.0)
|
||||
rspec-core (3.12.1)
|
||||
rspec-support (~> 3.12.0)
|
||||
@@ -328,6 +332,7 @@ DEPENDENCIES
|
||||
diffy
|
||||
discard
|
||||
disco
|
||||
faiss
|
||||
good_job
|
||||
http-cookie
|
||||
importmap-rails
|
||||
|
||||
@@ -15,7 +15,8 @@ class Scraper::ClientFactory
|
||||
|
||||
# public API
|
||||
def self.get_gallery_dl_client
|
||||
if Rails.env.test?
|
||||
# TODO - can probably allow development to hit real websites
|
||||
if Rails.env.test? || Rails.env.development?
|
||||
@gallery_dl_client_mock || raise("no gallery dl mock set")
|
||||
else
|
||||
_gallery_dl_client_impl
|
||||
@@ -23,7 +24,7 @@ class Scraper::ClientFactory
|
||||
end
|
||||
|
||||
def self.get_twitter_http_client
|
||||
if Rails.env.test?
|
||||
if Rails.env.test? || Rails.env.development?
|
||||
@http_client_mock || raise("no http client mock set")
|
||||
else
|
||||
_http_client_impl(:twitter, Scraper::TwitterHttpClientConfig)
|
||||
@@ -31,7 +32,7 @@ class Scraper::ClientFactory
|
||||
end
|
||||
|
||||
def self.get_fa_http_client
|
||||
if Rails.env.test?
|
||||
if Rails.env.test? || Rails.env.development?
|
||||
@http_client_mock || raise("no http client mock set")
|
||||
else
|
||||
_http_client_impl(:fa, Scraper::FaHttpClientConfig)
|
||||
|
||||
8
app/models/domain/fa/follow.rb
Normal file
8
app/models/domain/fa/follow.rb
Normal file
@@ -0,0 +1,8 @@
|
||||
class Domain::Fa::Follow < ReduxApplicationRecord
|
||||
self.table_name = "domain_fa_follows"
|
||||
|
||||
belongs_to :follower,
|
||||
class_name: "::Domain::Fa::User"
|
||||
belongs_to :followed,
|
||||
class_name: "::Domain::Fa::User"
|
||||
end
|
||||
@@ -8,6 +8,11 @@ class Domain::Fa::User < ReduxApplicationRecord
|
||||
inverse_of: :creator,
|
||||
foreign_key: :creator_id
|
||||
|
||||
has_one :disco,
|
||||
class_name: "::Domain::Fa::UserFactor",
|
||||
inverse_of: :user,
|
||||
foreign_key: :user_id
|
||||
|
||||
enum :state, [
|
||||
:ok, # so far so good, user may not yet be scanned
|
||||
:scan_error, # user has been removed or otherwise, see state_detail
|
||||
|
||||
6
app/models/domain/fa/user_factor.rb
Normal file
6
app/models/domain/fa/user_factor.rb
Normal file
@@ -0,0 +1,6 @@
|
||||
class Domain::Fa::UserFactor < ReduxApplicationRecord
|
||||
self.table_name = "domain_fa_user_factors"
|
||||
|
||||
belongs_to :user, class_name: "::Domain::Fa::User"
|
||||
has_neighbors :factors, dimensions: 32
|
||||
end
|
||||
@@ -1,4 +0,0 @@
|
||||
class Domain::Fa::UserFactor
|
||||
belongs_to :user, class_name: "::Domain::Fa::User"
|
||||
has_neighbors :factors
|
||||
end
|
||||
@@ -82,13 +82,7 @@ testcookies: &testcookies
|
||||
value: oaidvalue
|
||||
path: /
|
||||
|
||||
development_regius:
|
||||
direct: *ddwhatnow
|
||||
proxy-1: *vipvillageworker
|
||||
dedipath-1: *blazeandwish
|
||||
serverhost-1: *cottoniq
|
||||
|
||||
development_curtus:
|
||||
development:
|
||||
direct: *testcookies
|
||||
proxy-1: *testcookies
|
||||
dedipath-1: *testcookies
|
||||
|
||||
@@ -19,7 +19,7 @@ redux_prod: &redux_prod
|
||||
migrations_paths: db/redux_migrate
|
||||
pool: 4
|
||||
|
||||
curtus_redux_dev: &curtus_redux_dev
|
||||
redux_dev: &redux_dev
|
||||
adapter: postgresql
|
||||
host: localhost
|
||||
port: 5432
|
||||
@@ -60,15 +60,10 @@ local_legacy_test: &local_legacy_test
|
||||
migrations_paths: db/legacy_migrate
|
||||
pool: 4
|
||||
|
||||
development_regius:
|
||||
development:
|
||||
redux:
|
||||
<<: *redux_prod
|
||||
legacy:
|
||||
<<: *legacy_prod
|
||||
|
||||
development_curtus:
|
||||
redux:
|
||||
<<: *curtus_redux_dev
|
||||
<<: *redux_dev
|
||||
# at the moment, no "real" legacy database. just fixtures.
|
||||
# legacy:
|
||||
# <<: *legacy_prod
|
||||
|
||||
|
||||
@@ -1,76 +0,0 @@
|
||||
require "active_support/core_ext/integer/time"
|
||||
|
||||
Rack::MiniProfiler.config.position = "top-right"
|
||||
Rack::MiniProfiler.config.skip_paths = [/\/api\/.+/]
|
||||
Rack::MiniProfiler.config.disable_caching = false
|
||||
|
||||
Rails.application.configure do
|
||||
# Settings specified here will take precedence over those in config/application.rb.
|
||||
config.web_console.development_only = false
|
||||
config.hosts << "localhost"
|
||||
|
||||
# In the development environment your application's code is reloaded any time
|
||||
# it changes. This slows down response time but is perfect for development
|
||||
# since you don't have to restart the web server when you make code changes.
|
||||
config.cache_classes = false
|
||||
|
||||
# Do not eager load code on boot.
|
||||
config.eager_load = false
|
||||
|
||||
# Show full error reports.
|
||||
config.consider_all_requests_local = true
|
||||
|
||||
# Enable server timing
|
||||
config.server_timing = true
|
||||
|
||||
# Enable/disable caching. By default caching is disabled.
|
||||
# Run rails dev:cache to toggle caching.
|
||||
if Rails.root.join("tmp/caching-dev.txt").exist?
|
||||
config.action_controller.perform_caching = true
|
||||
config.action_controller.enable_fragment_cache_logging = true
|
||||
|
||||
config.cache_store = :memory_store
|
||||
config.public_file_server.headers = {
|
||||
"Cache-Control" => "public, max-age=#{2.days.to_i}",
|
||||
}
|
||||
else
|
||||
config.action_controller.perform_caching = false
|
||||
|
||||
config.cache_store = :null_store
|
||||
end
|
||||
|
||||
# Store uploaded files on the local file system (see config/storage.yml for options).
|
||||
config.active_storage.service = :local
|
||||
|
||||
# Don't care if the mailer can't send.
|
||||
config.action_mailer.raise_delivery_errors = false
|
||||
|
||||
config.action_mailer.perform_caching = false
|
||||
|
||||
# Print deprecation notices to the Rails logger.
|
||||
config.active_support.deprecation = :log
|
||||
|
||||
# Raise exceptions for disallowed deprecations.
|
||||
config.active_support.disallowed_deprecation = :raise
|
||||
|
||||
# Tell Active Support which deprecation messages to disallow.
|
||||
config.active_support.disallowed_deprecation_warnings = []
|
||||
|
||||
# Raise an error on page load if there are pending migrations.
|
||||
config.active_record.migration_error = :page_load
|
||||
|
||||
# Highlight code that triggered database queries in logs.
|
||||
config.active_record.verbose_query_logs = true
|
||||
|
||||
# Suppress logger output for asset requests.
|
||||
config.assets.quiet = true
|
||||
|
||||
# Raises error for missing translations.
|
||||
# config.i18n.raise_on_missing_translations = true
|
||||
|
||||
# Annotate rendered view with file names.
|
||||
# config.action_view.annotate_rendered_view_with_filenames = true
|
||||
|
||||
# Uncomment if you wish to allow Action Cable access from any origin.
|
||||
# config.action_cable.disable_request_forgery_protection = true
|
||||
end
|
||||
@@ -1,14 +1,8 @@
|
||||
development_regius:
|
||||
development:
|
||||
host: "http://grafana.local:8086"
|
||||
token: "W2ikhmpQxQHZAStgSEK6s6aJxnOqeD4Zz2MI1m_lnD1JX57V9Esqm0zXb3DWbN7Gnj2GdmF_YrcvE8cy6NbIqQ=="
|
||||
org: "primary"
|
||||
bucket: "redux-scraper"
|
||||
|
||||
development_curtus:
|
||||
host: "http://grafana.local:8086"
|
||||
token: "W2ikhmpQxQHZAStgSEK6s6aJxnOqeD4Zz2MI1m_lnD1JX57V9Esqm0zXb3DWbN7Gnj2GdmF_YrcvE8cy6NbIqQ=="
|
||||
org: "primary"
|
||||
bucket: "redux-scraper"
|
||||
bucket: "redux-scraper-dev"
|
||||
|
||||
production:
|
||||
host: "http://grafana.local:8086"
|
||||
|
||||
@@ -18,10 +18,7 @@ default: &default
|
||||
production:
|
||||
<<: *default
|
||||
|
||||
development_regius:
|
||||
<<: *default
|
||||
|
||||
development_curtus:
|
||||
development:
|
||||
direct: {}
|
||||
proxy-1: {}
|
||||
dedipath-1: {}
|
||||
|
||||
12
db/redux_migrate/20230330134212_create_domain_fa_follows.rb
Normal file
12
db/redux_migrate/20230330134212_create_domain_fa_follows.rb
Normal file
@@ -0,0 +1,12 @@
|
||||
class CreateDomainFaFollows < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :domain_fa_follows do |t|
|
||||
t.references :follower
|
||||
t.references :followed
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_foreign_key :domain_fa_follows, :domain_fa_users, column: :follower_id
|
||||
add_foreign_key :domain_fa_follows, :domain_fa_users, column: :followed_id
|
||||
end
|
||||
end
|
||||
13
db/schema.rb
generated
13
db/schema.rb
generated
@@ -10,7 +10,7 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_03_30_130506) do
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_03_30_134212) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_stat_statements"
|
||||
enable_extension "pg_trgm"
|
||||
@@ -104,6 +104,15 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_30_130506) do
|
||||
t.index ["name"], name: "index_domain_e621_tags_on_name", unique: true
|
||||
end
|
||||
|
||||
create_table "domain_fa_follows", force: :cascade do |t|
|
||||
t.bigint "follower_id"
|
||||
t.bigint "followed_id"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["followed_id"], name: "index_domain_fa_follows_on_followed_id"
|
||||
t.index ["follower_id"], name: "index_domain_fa_follows_on_follower_id"
|
||||
end
|
||||
|
||||
create_table "domain_fa_posts", force: :cascade do |t|
|
||||
t.integer "fa_id"
|
||||
t.bigint "creator_id"
|
||||
@@ -338,6 +347,8 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_30_130506) do
|
||||
|
||||
add_foreign_key "blob_entries", "blob_entries", column: "base_sha256", primary_key: "sha256"
|
||||
add_foreign_key "domain_e621_post_versions", "domain_e621_posts", column: "item_id"
|
||||
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "followed_id"
|
||||
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "follower_id"
|
||||
add_foreign_key "domain_fa_posts", "domain_fa_users", column: "creator_id"
|
||||
add_foreign_key "domain_fa_posts", "http_log_entries", column: "file_id"
|
||||
add_foreign_key "domain_fa_user_factors", "domain_fa_users", column: "user_id"
|
||||
|
||||
61
spec/models/domain/fa/user_factor_spec.rb
Normal file
61
spec/models/domain/fa/user_factor_spec.rb
Normal file
@@ -0,0 +1,61 @@
|
||||
require "rails_helper"
|
||||
|
||||
describe Domain::Fa::UserFactor do
|
||||
it "works" do
|
||||
# a few clusters of users, followed by a few others
|
||||
cluster1 = 10.times.map do |i|
|
||||
SpecUtil.create_domain_fa_user(name: "cluster-1-#{i}")
|
||||
end
|
||||
|
||||
cluster2 = 10.times.map do |i|
|
||||
SpecUtil.create_domain_fa_user(name: "cluster-2-#{i}")
|
||||
end
|
||||
|
||||
cluster1_ids = cluster1.map(&:name)
|
||||
cluster2_ids = cluster2.map(&:name)
|
||||
|
||||
follower1, follower2, follower3 = 3.times.map do |i|
|
||||
SpecUtil.create_domain_fa_user(name: "follower-#{i + 1}")
|
||||
end
|
||||
|
||||
# follower1 follows cluster1
|
||||
cluster1.each do |user|
|
||||
Domain::Fa::Follow.create!(follower: follower1, followed: user)
|
||||
end
|
||||
|
||||
# follower2 follows cluster2
|
||||
cluster2.each do |user|
|
||||
Domain::Fa::Follow.create!(follower: follower2, followed: user)
|
||||
end
|
||||
|
||||
# follower3 follows everybody
|
||||
(cluster1 + cluster2).each do |user|
|
||||
Domain::Fa::Follow.create!(follower: follower3, followed: user)
|
||||
end
|
||||
|
||||
# calculate the recommender
|
||||
recommender = Disco::Recommender.new(
|
||||
factors: Domain::Fa::UserFactor.neighbor_attributes[:factors][:dimensions],
|
||||
)
|
||||
query = Enumerator.new do |e|
|
||||
Domain::Fa::Follow.all.find_each do |follow|
|
||||
e << follow
|
||||
end
|
||||
end
|
||||
recommender.fit(query.map do |f|
|
||||
{ user_id: f.follower.name, item_id: f.followed.name }
|
||||
end)
|
||||
recommender.optimize_item_recs
|
||||
|
||||
# cluster1 should all be similar to each other
|
||||
cluster1.each do |user|
|
||||
ids = Set.new(recommender.similar_items(user.name, count: 9))
|
||||
expected = Set.new(cluster1_ids)
|
||||
actual = Set.new(ids.map do |id|
|
||||
id[:item_id]
|
||||
end)
|
||||
|
||||
assert expected.superset?(actual), ids
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -3,7 +3,7 @@ require "rspec/mocks"
|
||||
class SpecUtil
|
||||
extend RSpec::Mocks::ExampleMethods
|
||||
|
||||
def self.random_string(length)
|
||||
def self.random_string(length = 8)
|
||||
(0...length).map { (65 + rand(26)).chr }.join
|
||||
end
|
||||
|
||||
@@ -156,12 +156,18 @@ class SpecUtil
|
||||
end
|
||||
end
|
||||
|
||||
def self.build_domain_fa_user
|
||||
def self.build_domain_fa_user(name: nil)
|
||||
Domain::Fa::User.new(
|
||||
name: random_string(10),
|
||||
name: name || random_string,
|
||||
)
|
||||
end
|
||||
|
||||
def self.create_domain_fa_user(...)
|
||||
build_domain_fa_user(...).tap do |user|
|
||||
user.save!
|
||||
end
|
||||
end
|
||||
|
||||
def self.build_domain_fa_post(creator: nil, fa_id: nil)
|
||||
@last_fa_id ||= 0
|
||||
@last_fa_id += 1
|
||||
|
||||
Reference in New Issue
Block a user