indexed posts enums

This commit is contained in:
Dylan Knutson
2024-12-25 21:46:33 +00:00
parent 355da6932b
commit 2381df7d10
22 changed files with 205 additions and 83 deletions

3
.cursorignore Normal file
View File

@@ -0,0 +1,3 @@
# Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)
config/database.yml
config/cookies/fa.yml

View File

@@ -1,4 +1,4 @@
version: "3"
version: '3'
services:
app:
@@ -32,15 +32,15 @@ services:
POSTGRES_PASSWORD: postgres
pgadmin:
image: dpage/pgadmin4:8.13.0
image: dpage/pgadmin4:8.14.0
restart: unless-stopped
network_mode: service:db
environment:
PGADMIN_DEFAULT_EMAIL: admin@example.com
PGADMIN_DEFAULT_PASSWORD: password
PGADMIN_LISTEN_PORT: 8080
PGADMIN_CONFIG_SERVER_MODE: "False"
PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: "False"
PGADMIN_CONFIG_SERVER_MODE: 'False'
PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: 'False'
volumes:
postgres-data:

10
.vscode/launch.json vendored
View File

@@ -7,10 +7,7 @@
"request": "launch",
"command": "bundle",
"script": "exec",
"args": [
"rails",
"server"
],
"args": ["rails", "server"],
"env": {
"RUBY_DEBUG_OPEN": "true",
"RAILS_ENV": "staging"
@@ -19,7 +16,8 @@
{
"type": "rdbg",
"name": "Attach rdbg",
"request": "attach"
"request": "attach",
"debugPort": "/tmp/rdbg-1000/rdbg-492838"
}
]
}
}

View File

@@ -30,7 +30,7 @@ gem "turbo-rails"
gem "stimulus-rails"
# Build JSON APIs with ease [https://github.com/rails/jbuilder]
gem "jbuilder"
gem "jbuilder", "~> 2.13"
# Use Redis adapter to run Action Cable in production
# gem "redis", "~> 4.0"

View File

@@ -179,7 +179,7 @@ GEM
irb (1.14.3)
rdoc (>= 4.0.0)
reline (>= 0.4.2)
jbuilder (2.11.5)
jbuilder (2.13.0)
actionview (>= 5.0.0)
activesupport (>= 5.0.0)
kaminari (1.2.2)
@@ -437,7 +437,7 @@ DEPENDENCIES
good_job (~> 4.6)
htmlbeautifier
http-cookie
jbuilder
jbuilder (~> 2.13)
kaminari
memory_profiler
neighbor

View File

@@ -5,15 +5,7 @@ require "rake/testtask"
require_relative "config/application"
Rails.application.load_tasks
$LOAD_PATH << Rails.root.join("rake")
Rake.application.rake_require "sst"
Rake.application.rake_require "log_entry"
Rake.application.rake_require "metrics"
Rake.application.rake_require "fa"
Rake.application.rake_require "e621"
Rake.application.rake_require "twitter"
Rake.application.rake_require "ib"
Rake.application.rake_require "blob_file"
Dir.glob(Rails.root.join("rake", "*.rake")).each { |rake_file| load rake_file }
task set_ar_stdout: :environment do
ActiveRecord::Base.logger = Logger.new($stdout)
@@ -88,7 +80,7 @@ task good_job: %i[environment set_ar_stdout set_logger_stdout] do
"GOOD_JOB_MAX_THREADS" => "4",
"GOOD_JOB_QUEUES" =>
ENV["GOOD_JOB_QUEUES"] ||
%w[manual:4 fa_post,e621:2 *:6].reject(&:nil?).join(";")
%w[manual:4 fa_post,e621:2 *:6].reject(&:nil?).join(";"),
}
env_hash.each do |key, value|
@@ -136,7 +128,7 @@ task :reverse_csv do
CSV.new(
File.open("rev_" + file, "w"),
write_headers: true,
headers: in_csv.headers
headers: in_csv.headers,
)
in_csv.reverse_each { |row| out_csv << row.map(&:second) }
out_csv.close

View File

@@ -1,9 +1,5 @@
class ApplicationController < ActionController::Base
before_action :validate_api_token
before_action do
@site_title = "ReFurrer"
@site_subtitle = "Furry Swiss Army Knife"
end
before_action do
if Rails.env.development? || Rails.env.staging?
Rack::MiniProfiler.authorize_request
@@ -16,7 +12,7 @@ class ApplicationController < ActionController::Base
"9c38727f-f11d-41de-b775-0effd86d520c" => "xjal",
"e38c568f-a24d-4f26-87f0-dfcd898a359d" => "fyacin",
"41fa1144-d4cd-11ed-afa1-0242ac120002" => "soft_fox_lad",
"9b3cf444-5913-4efb-9935-bf26501232ff" => "syfaro"
"9b3cf444-5913-4efb-9935-bf26501232ff" => "syfaro",
}
def validate_api_token

View File

@@ -1,5 +1,5 @@
class Domain::Fa::PostsController < ApplicationController
before_action :set_domain_fa_post, only: %i[show scan_post scan_post]
before_action :set_domain_fa_post, only: %i[show scan_post]
skip_before_action :verify_authenticity_token,
only: %i[try_scan_post try_scan_posts]
@@ -9,7 +9,7 @@ class Domain::Fa::PostsController < ApplicationController
if params[:user_url_name]
@user =
Domain::Fa::User.find_by(url_name: params[:user_url_name]) ||
raise("404")
raise(ActiveRecord::RecordNotFound)
end
relation = @user ? @user.posts : Domain::Fa::Post
@posts =
@@ -57,7 +57,7 @@ class Domain::Fa::PostsController < ApplicationController
enqueued: enqueued,
title: post&.title,
state: state_string,
is_terminal_state: post&.scanned? && post&.file&.present? || false
is_terminal_state: post&.scanned? && post&.file&.present? || false,
}
end
@@ -86,7 +86,7 @@ class Domain::Fa::PostsController < ApplicationController
response[fa_id] = {
state: state,
enqueued: try_enqueue_post_scan(post, fa_id)
enqueued: try_enqueue_post_scan(post, fa_id),
}
end
render json: response
@@ -105,7 +105,7 @@ class Domain::Fa::PostsController < ApplicationController
Rails.logger.info "Enqueue scan #{fa_id}"
Domain::Fa::Job::ScanPostJob.set(
priority: -15,
queue: "manual"
queue: "manual",
).perform_later({ fa_id: fa_id })
return true
end
@@ -114,7 +114,7 @@ class Domain::Fa::PostsController < ApplicationController
Rails.logger.info "Enqueue file #{fa_id}"
Domain::Fa::Job::ScanFileJob.set(
priority: -15,
queue: "manual"
queue: "manual",
).perform_later({ post: post })
return true
end

View File

@@ -17,7 +17,9 @@ module Domain::E621::TagUtil
.uniq
e621_updated_at = post_json["updated_at"]
return false if e621_post.state_detail["e621_updated_at"] == e621_updated_at
if e621_post.state_detail["e621_updated_at"] == e621_updated_at
return e621_post
end
e621_post.state_detail["e621_updated_at"] = post_json["updated_at"]
e621_post.state_detail["index_page_ids"] ||= []

View File

@@ -1,4 +1,4 @@
module IndexedPostable
module HasIndexedPost
extend ActiveSupport::Concern
included do
@@ -6,9 +6,10 @@ module IndexedPostable
as: :postable,
dependent: :destroy,
inverse_of: :postable,
validate: false,
autosave: true
before_save :ensure_indexed_post!
after_initialize :ensure_indexed_post!
def ensure_indexed_post!
self.indexed_post ||=

View File

@@ -2,7 +2,7 @@ class Domain::E621::Post < ReduxApplicationRecord
self.table_name = "domain_e621_posts"
has_lite_trail(schema_version: 1, separate_versions_table: true)
include IndexedPostable
include HasIndexedPost
include Discard::Model
self.discard_column = :deleted_at
default_scope -> { kept }

View File

@@ -8,6 +8,8 @@ class Domain::Fa::Post < ReduxApplicationRecord
},
)
include HasIndexedPost
enum :state,
[
:ok, # so far so good, post may not yet be scanned or have file downloaded
@@ -52,8 +54,6 @@ class Domain::Fa::Post < ReduxApplicationRecord
foreign_key: :post_id,
dependent: :destroy
include IndexedPostable
def to_param
self.fa_id.to_s
end

View File

@@ -1,5 +1,6 @@
class Domain::Inkbunny::Post < ReduxApplicationRecord
self.table_name = "domain_inkbunny_posts"
include HasIndexedPost
belongs_to :creator,
class_name: "::Domain::Inkbunny::User",

View File

@@ -1,7 +1,7 @@
<!DOCTYPE html>
<html class='h-full'>
<head>
<title><%= @site_title %></title>
<title>ReFurrer</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
<% if Object.const_defined?("Rack::MiniProfiler") %>
<% # needed so miniprofiler doens't screw with the fetch() api %>
@@ -22,11 +22,11 @@
<header class="bg-slate-100 border-slate-200 border-b-2">
<div class="mx-auto max-w-5xl py-6 px-6 sm:px-8 flex items-baseline">
<h1 class="text-4xl sm:text-5xl font-bold text-slate-900">
<%= link_to @site_title, root_path %>
<%= link_to "ReFurrer", root_path %>
</h1>
<div class="flex-grow"></div>
<h2 class="text-1xl sm:text-2xl italic font-bold text-slate-500">
<%= @site_subtitle %>
Furry Swiss Army Knife
</h2>
</div>
</header>

27
bin/rdbg Executable file
View File

@@ -0,0 +1,27 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
#
# This file was generated by Bundler.
#
# The application 'rdbg' is installed as part of a gem, and
# this file is here to facilitate running it.
#
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
bundle_binstub = File.expand_path("bundle", __dir__)
if File.file?(bundle_binstub)
if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
load(bundle_binstub)
else
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
end
end
require "rubygems"
require "bundler/setup"
load Gem.bin_path("debug", "rdbg")

View File

@@ -1,6 +1,12 @@
require_relative "boot"
require "rails/all"
module ReduxScraper
class Application < Rails::Application
config.active_support.deprecation = :raise
end
end
# Require the gems listed in Gemfile, including any gems
# you've limited to :test, :development, or :production.
Bundler.require(*Rails.groups)

View File

@@ -0,0 +1,48 @@
class AddPostableTypeEnum < ActiveRecord::Migration[7.2]
def up
execute <<-SQL
CREATE TYPE postable_type AS ENUM ('Domain::Fa::Post', 'Domain::E621::Post', 'Domain::Inkbunny::Post');
SQL
# Create a temporary column with the new type
add_column :indexed_posts, :postable_type_enum, :postable_type
# Copy data from the old column to the new one
execute <<-SQL
UPDATE indexed_posts
SET postable_type_enum = postable_type::postable_type;
SQL
# Remove the old column and rename the new one
remove_column :indexed_posts, :postable_type
rename_column :indexed_posts, :postable_type_enum, :postable_type
# make the new column non-nullable
change_column_null :indexed_posts, :postable_type, false
# index on the polymorphic association
add_index :indexed_posts, %i[postable_id postable_type], unique: true
end
def down
# Create a temporary string column
add_column :indexed_posts, :postable_type_string, :string
# Copy data from enum to string
execute <<-SQL
UPDATE indexed_posts
SET postable_type_string = postable_type::text;
SQL
# Remove enum column and rename string column
remove_column :indexed_posts, :postable_type
rename_column :indexed_posts, :postable_type_string, :postable_type
# Drop the enum type
execute <<-SQL
DROP TYPE postable_type;
SQL
change_column_null :indexed_posts, :postable_type, true
end
end

8
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.2].define(version: 2024_12_24_061643) do
ActiveRecord::Schema[7.2].define(version: 2024_12_25_183955) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_prewarm"
enable_extension "pg_stat_statements"
@@ -19,6 +19,10 @@ ActiveRecord::Schema[7.2].define(version: 2024_12_24_061643) do
enable_extension "plpgsql"
enable_extension "vector"
# Custom types defined in this database.
# Note that some types may not work with other database engines. Be careful if changing database.
create_enum "postable_type", ["Domain::Fa::Post", "Domain::E621::Post", "Domain::Inkbunny::Post"]
create_table "blob_entries_p", id: false, force: :cascade do |t|
t.binary "sha256", null: false
t.binary "base_sha256"
@@ -1739,9 +1743,9 @@ ActiveRecord::Schema[7.2].define(version: 2024_12_24_061643) do
create_table "indexed_posts", force: :cascade do |t|
t.integer "postable_id", null: false
t.string "postable_type", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.enum "postable_type", null: false, enum_type: "postable_type"
t.index ["created_at"], name: "index_indexed_posts_on_created_at"
t.index ["postable_id", "postable_type"], name: "index_indexed_posts_on_postable_id_and_postable_type", unique: true
end

44
rake/indexed_posts.rake Normal file
View File

@@ -0,0 +1,44 @@
namespace :indexed_posts do
desc "Ensure all Domain::*::Post records have an IndexedPost record"
task ensure_all_posts: %i[ensure_fa_posts ensure_e621_posts ensure_ib_posts]
desc "Ensure all Domain::Fa::Post records have an IndexedPost record"
task ensure_fa_posts: %i[environment set_logger_stdout] do
IndexedPostsRake.ensure_indexed_post_on(
IndexedPostsRake.model_without_indexed_post(Domain::Fa::Post),
)
end
desc "Ensure all Domain::E621::Post records have an IndexedPost record"
task ensure_e621_posts: %i[environment set_logger_stdout] do
IndexedPostsRake.ensure_indexed_post_on(
IndexedPostsRake.model_without_indexed_post(Domain::E621::Post),
)
end
desc "Ensure all Domain::Inkbunny::Post records have an IndexedPost record"
task ensure_ib_posts: %i[environment set_logger_stdout] do
IndexedPostsRake.ensure_indexed_post_on(
IndexedPostsRake.model_without_indexed_post(Domain::Inkbunny::Post),
)
end
end
module IndexedPostsRake
def self.model_without_indexed_post(model)
model.left_outer_joins(:indexed_post).where(indexed_posts: { id: nil })
end
def self.ensure_indexed_post_on(relation)
total = relation.count
Rails.logger.info(
"Ensuring indexed post on #{relation.name.bold} (#{total} total to process)",
)
progress = ProgressBar.create(total: total, throttle_rate: 0.2)
relation.find_each do |post|
post.ensure_indexed_post!
post.save!
progress.increment
end
end
end

View File

@@ -1,26 +1,28 @@
require "rails_helper"
describe Domain::Inkbunny::Job::FileJob do
module FileJobSpec
AN_IMAGE_SHA256 =
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
AN_IMAGE_MD5 = "d41d8cd98f00b204e9800998ecf8427e"
AN_IMAGE_PATH = "domain/e621/job/an-image.png"
AN_IMAGE_URL = "https://static1.e621.net/file/foo.png"
end
describe Domain::Inkbunny::Job::FileJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
let(:file) do
Domain::Inkbunny::File.create!(
{
ib_file_id: 12_345,
md5_initial: AN_IMAGE_MD5,
md5_full: AN_IMAGE_MD5,
url_str: AN_IMAGE_URL,
md5_initial: FileJobSpec::AN_IMAGE_MD5,
md5_full: FileJobSpec::AN_IMAGE_MD5,
url_str: FileJobSpec::AN_IMAGE_URL,
file_name: "foo.png",
ib_created_at: Time.now,
file_order: 1,
md5s: {
initial_file_md5: AN_IMAGE_MD5
initial_file_md5: FileJobSpec::AN_IMAGE_MD5,
},
post:
Domain::Inkbunny::Post.create!(
@@ -28,11 +30,11 @@ describe Domain::Inkbunny::Job::FileJob do
ib_post_id: 67_891,
creator:
Domain::Inkbunny::User.create!(
{ ib_user_id: 12_345, name: "TheUser" }
)
}
)
}
{ ib_user_id: 12_345, name: "TheUser" },
),
},
),
},
)
end
@@ -43,19 +45,19 @@ describe Domain::Inkbunny::Job::FileJob do
http_client_mock,
[
{
uri: AN_IMAGE_URL,
uri: FileJobSpec::AN_IMAGE_URL,
status_code: 200,
content_type: "image/png",
contents: SpecUtil.read_fixture_file(AN_IMAGE_PATH),
caused_by_entry: hle
}
]
contents: SpecUtil.read_fixture_file(FileJobSpec::AN_IMAGE_PATH),
caused_by_entry: hle,
},
],
)
perform_now({ file: file, caused_by_entry: hle })
file.reload
expect(file.log_entry.response).to eq(file.blob_entry)
expect(file.blob_entry.sha256_hex).to eq(AN_IMAGE_SHA256)
expect(file.blob_entry.sha256_hex).to eq(FileJobSpec::AN_IMAGE_SHA256)
end
it "marks the post as errored if the download fails" do
@@ -64,12 +66,12 @@ describe Domain::Inkbunny::Job::FileJob do
http_client_mock,
[
{
uri: AN_IMAGE_URL,
uri: FileJobSpec::AN_IMAGE_URL,
status_code: 404,
content_type: "text/html",
contents: "not found"
}
]
contents: "not found",
},
],
)
perform_now({ file: file })
@@ -81,8 +83,8 @@ describe Domain::Inkbunny::Job::FileJob do
{
"status_code" => 404,
"log_entry_id" => hles[0].id,
"retry_count" => 1
}
"retry_count" => 1,
},
)
end
@@ -92,18 +94,18 @@ describe Domain::Inkbunny::Job::FileJob do
http_client_mock,
[
{
uri: AN_IMAGE_URL,
uri: FileJobSpec::AN_IMAGE_URL,
status_code: 500,
content_type: "text/html",
contents: "not found"
contents: "not found",
},
{
uri: AN_IMAGE_URL,
uri: FileJobSpec::AN_IMAGE_URL,
status_code: 200,
content_type: "image/png",
contents: SpecUtil.read_fixture_file(AN_IMAGE_PATH)
}
]
contents: SpecUtil.read_fixture_file(FileJobSpec::AN_IMAGE_PATH),
},
],
)
perform_now({ file: file }, should_raise: true)
@@ -113,7 +115,7 @@ describe Domain::Inkbunny::Job::FileJob do
perform_now({ file: file })
file.reload
expect(file.blob_entry).not_to be_nil
expect(file.blob_entry.sha256_hex).to eq(AN_IMAGE_SHA256)
expect(file.blob_entry.sha256_hex).to eq(FileJobSpec::AN_IMAGE_SHA256)
end
it "throws on a non-404 error in order to retry later" do
@@ -123,12 +125,12 @@ describe Domain::Inkbunny::Job::FileJob do
http_client_mock,
[
{
uri: AN_IMAGE_URL,
uri: FileJobSpec::AN_IMAGE_URL,
status_code: 500,
content_type: "text/html",
contents: "not found"
}
] * num_retries
contents: "not found",
},
] * num_retries,
)
num_retries.times.map do |retry_num|
@@ -140,8 +142,8 @@ describe Domain::Inkbunny::Job::FileJob do
{
"status_code" => 500,
"log_entry_id" => hles[retry_num].id,
"retry_count" => retry_num + 1
}
"retry_count" => retry_num + 1,
},
)
end

View File

@@ -3,7 +3,6 @@ require "rails_helper"
describe Domain::E621::Post do
it "ensures indexed_post is created" do
post = SpecUtil.build_e621_post(created_at: 1.day.ago)
expect(post.indexed_post).to be_nil
post.save!
expect(post.indexed_post).to be_present
expect(post.indexed_post.created_at).to eq(post.created_at)

View File

@@ -3,7 +3,6 @@ require "rails_helper"
describe Domain::Fa::Post do
it "ensures indexed_post is created" do
post = SpecUtil.build_domain_fa_post(created_at: 1.day.ago)
expect(post.indexed_post).to be_nil
post.save!
expect(post.indexed_post).to be_present
expect(post.indexed_post.created_at).to eq(post.created_at)