diff --git a/.cursorignore b/.cursorignore new file mode 100644 index 00000000..76c6e5cd --- /dev/null +++ b/.cursorignore @@ -0,0 +1,3 @@ +# Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv) +config/database.yml +config/cookies/fa.yml diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 3766390c..2ae5ce4c 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -1,4 +1,4 @@ -version: "3" +version: '3' services: app: @@ -32,15 +32,15 @@ services: POSTGRES_PASSWORD: postgres pgadmin: - image: dpage/pgadmin4:8.13.0 + image: dpage/pgadmin4:8.14.0 restart: unless-stopped network_mode: service:db environment: PGADMIN_DEFAULT_EMAIL: admin@example.com PGADMIN_DEFAULT_PASSWORD: password PGADMIN_LISTEN_PORT: 8080 - PGADMIN_CONFIG_SERVER_MODE: "False" - PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: "False" + PGADMIN_CONFIG_SERVER_MODE: 'False' + PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: 'False' volumes: postgres-data: diff --git a/.vscode/launch.json b/.vscode/launch.json index 395a73ca..35ac7506 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -7,10 +7,7 @@ "request": "launch", "command": "bundle", "script": "exec", - "args": [ - "rails", - "server" - ], + "args": ["rails", "server"], "env": { "RUBY_DEBUG_OPEN": "true", "RAILS_ENV": "staging" @@ -19,7 +16,8 @@ { "type": "rdbg", "name": "Attach rdbg", - "request": "attach" + "request": "attach", + "debugPort": "/tmp/rdbg-1000/rdbg-492838" } ] -} \ No newline at end of file +} diff --git a/Gemfile b/Gemfile index 0c14a13f..9ab8fdf1 100644 --- a/Gemfile +++ b/Gemfile @@ -30,7 +30,7 @@ gem "turbo-rails" gem "stimulus-rails" # Build JSON APIs with ease [https://github.com/rails/jbuilder] -gem "jbuilder" +gem "jbuilder", "~> 2.13" # Use Redis adapter to run Action Cable in production # gem "redis", "~> 4.0" diff --git a/Gemfile.lock b/Gemfile.lock index f468c160..bb288f53 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -179,7 +179,7 @@ GEM irb (1.14.3) rdoc (>= 4.0.0) reline (>= 0.4.2) - jbuilder (2.11.5) + jbuilder (2.13.0) actionview (>= 5.0.0) activesupport (>= 5.0.0) kaminari (1.2.2) @@ -437,7 +437,7 @@ DEPENDENCIES good_job (~> 4.6) htmlbeautifier http-cookie - jbuilder + jbuilder (~> 2.13) kaminari memory_profiler neighbor diff --git a/Rakefile b/Rakefile index 586c1ae6..d664658b 100644 --- a/Rakefile +++ b/Rakefile @@ -5,15 +5,7 @@ require "rake/testtask" require_relative "config/application" Rails.application.load_tasks -$LOAD_PATH << Rails.root.join("rake") -Rake.application.rake_require "sst" -Rake.application.rake_require "log_entry" -Rake.application.rake_require "metrics" -Rake.application.rake_require "fa" -Rake.application.rake_require "e621" -Rake.application.rake_require "twitter" -Rake.application.rake_require "ib" -Rake.application.rake_require "blob_file" +Dir.glob(Rails.root.join("rake", "*.rake")).each { |rake_file| load rake_file } task set_ar_stdout: :environment do ActiveRecord::Base.logger = Logger.new($stdout) @@ -88,7 +80,7 @@ task good_job: %i[environment set_ar_stdout set_logger_stdout] do "GOOD_JOB_MAX_THREADS" => "4", "GOOD_JOB_QUEUES" => ENV["GOOD_JOB_QUEUES"] || - %w[manual:4 fa_post,e621:2 *:6].reject(&:nil?).join(";") + %w[manual:4 fa_post,e621:2 *:6].reject(&:nil?).join(";"), } env_hash.each do |key, value| @@ -136,7 +128,7 @@ task :reverse_csv do CSV.new( File.open("rev_" + file, "w"), write_headers: true, - headers: in_csv.headers + headers: in_csv.headers, ) in_csv.reverse_each { |row| out_csv << row.map(&:second) } out_csv.close diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index df80cdea..249fd95e 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -1,9 +1,5 @@ class ApplicationController < ActionController::Base before_action :validate_api_token - before_action do - @site_title = "ReFurrer" - @site_subtitle = "Furry Swiss Army Knife" - end before_action do if Rails.env.development? || Rails.env.staging? Rack::MiniProfiler.authorize_request @@ -16,7 +12,7 @@ class ApplicationController < ActionController::Base "9c38727f-f11d-41de-b775-0effd86d520c" => "xjal", "e38c568f-a24d-4f26-87f0-dfcd898a359d" => "fyacin", "41fa1144-d4cd-11ed-afa1-0242ac120002" => "soft_fox_lad", - "9b3cf444-5913-4efb-9935-bf26501232ff" => "syfaro" + "9b3cf444-5913-4efb-9935-bf26501232ff" => "syfaro", } def validate_api_token diff --git a/app/controllers/domain/fa/posts_controller.rb b/app/controllers/domain/fa/posts_controller.rb index a4419211..77cf4540 100644 --- a/app/controllers/domain/fa/posts_controller.rb +++ b/app/controllers/domain/fa/posts_controller.rb @@ -1,5 +1,5 @@ class Domain::Fa::PostsController < ApplicationController - before_action :set_domain_fa_post, only: %i[show scan_post scan_post] + before_action :set_domain_fa_post, only: %i[show scan_post] skip_before_action :verify_authenticity_token, only: %i[try_scan_post try_scan_posts] @@ -9,7 +9,7 @@ class Domain::Fa::PostsController < ApplicationController if params[:user_url_name] @user = Domain::Fa::User.find_by(url_name: params[:user_url_name]) || - raise("404") + raise(ActiveRecord::RecordNotFound) end relation = @user ? @user.posts : Domain::Fa::Post @posts = @@ -57,7 +57,7 @@ class Domain::Fa::PostsController < ApplicationController enqueued: enqueued, title: post&.title, state: state_string, - is_terminal_state: post&.scanned? && post&.file&.present? || false + is_terminal_state: post&.scanned? && post&.file&.present? || false, } end @@ -86,7 +86,7 @@ class Domain::Fa::PostsController < ApplicationController response[fa_id] = { state: state, - enqueued: try_enqueue_post_scan(post, fa_id) + enqueued: try_enqueue_post_scan(post, fa_id), } end render json: response @@ -105,7 +105,7 @@ class Domain::Fa::PostsController < ApplicationController Rails.logger.info "Enqueue scan #{fa_id}" Domain::Fa::Job::ScanPostJob.set( priority: -15, - queue: "manual" + queue: "manual", ).perform_later({ fa_id: fa_id }) return true end @@ -114,7 +114,7 @@ class Domain::Fa::PostsController < ApplicationController Rails.logger.info "Enqueue file #{fa_id}" Domain::Fa::Job::ScanFileJob.set( priority: -15, - queue: "manual" + queue: "manual", ).perform_later({ post: post }) return true end diff --git a/app/lib/domain/e621/tag_util.rb b/app/lib/domain/e621/tag_util.rb index 48a747e2..b2d8d182 100644 --- a/app/lib/domain/e621/tag_util.rb +++ b/app/lib/domain/e621/tag_util.rb @@ -17,7 +17,9 @@ module Domain::E621::TagUtil .uniq e621_updated_at = post_json["updated_at"] - return false if e621_post.state_detail["e621_updated_at"] == e621_updated_at + if e621_post.state_detail["e621_updated_at"] == e621_updated_at + return e621_post + end e621_post.state_detail["e621_updated_at"] = post_json["updated_at"] e621_post.state_detail["index_page_ids"] ||= [] diff --git a/app/models/concerns/indexed_postable.rb b/app/models/concerns/has_indexed_post.rb similarity index 78% rename from app/models/concerns/indexed_postable.rb rename to app/models/concerns/has_indexed_post.rb index b8a3f89c..4786c549 100644 --- a/app/models/concerns/indexed_postable.rb +++ b/app/models/concerns/has_indexed_post.rb @@ -1,4 +1,4 @@ -module IndexedPostable +module HasIndexedPost extend ActiveSupport::Concern included do @@ -6,9 +6,10 @@ module IndexedPostable as: :postable, dependent: :destroy, inverse_of: :postable, + validate: false, autosave: true - before_save :ensure_indexed_post! + after_initialize :ensure_indexed_post! def ensure_indexed_post! self.indexed_post ||= diff --git a/app/models/domain/e621/post.rb b/app/models/domain/e621/post.rb index 145b1036..d97dea63 100644 --- a/app/models/domain/e621/post.rb +++ b/app/models/domain/e621/post.rb @@ -2,7 +2,7 @@ class Domain::E621::Post < ReduxApplicationRecord self.table_name = "domain_e621_posts" has_lite_trail(schema_version: 1, separate_versions_table: true) - include IndexedPostable + include HasIndexedPost include Discard::Model self.discard_column = :deleted_at default_scope -> { kept } diff --git a/app/models/domain/fa/post.rb b/app/models/domain/fa/post.rb index ed4e618b..f41021bd 100644 --- a/app/models/domain/fa/post.rb +++ b/app/models/domain/fa/post.rb @@ -8,6 +8,8 @@ class Domain::Fa::Post < ReduxApplicationRecord }, ) + include HasIndexedPost + enum :state, [ :ok, # so far so good, post may not yet be scanned or have file downloaded @@ -52,8 +54,6 @@ class Domain::Fa::Post < ReduxApplicationRecord foreign_key: :post_id, dependent: :destroy - include IndexedPostable - def to_param self.fa_id.to_s end diff --git a/app/models/domain/inkbunny/post.rb b/app/models/domain/inkbunny/post.rb index 3146344b..b0bf7578 100644 --- a/app/models/domain/inkbunny/post.rb +++ b/app/models/domain/inkbunny/post.rb @@ -1,5 +1,6 @@ class Domain::Inkbunny::Post < ReduxApplicationRecord self.table_name = "domain_inkbunny_posts" + include HasIndexedPost belongs_to :creator, class_name: "::Domain::Inkbunny::User", diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb index 8b25e955..e66fc629 100644 --- a/app/views/layouts/application.html.erb +++ b/app/views/layouts/application.html.erb @@ -1,7 +1,7 @@ - <%= @site_title %> + ReFurrer <% if Object.const_defined?("Rack::MiniProfiler") %> <% # needed so miniprofiler doens't screw with the fetch() api %> @@ -22,11 +22,11 @@

- <%= link_to @site_title, root_path %> + <%= link_to "ReFurrer", root_path %>

- <%= @site_subtitle %> + Furry Swiss Army Knife

diff --git a/bin/rdbg b/bin/rdbg new file mode 100755 index 00000000..5e3b279f --- /dev/null +++ b/bin/rdbg @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# +# This file was generated by Bundler. +# +# The application 'rdbg' is installed as part of a gem, and +# this file is here to facilitate running it. +# + +ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__) + +bundle_binstub = File.expand_path("bundle", __dir__) + +if File.file?(bundle_binstub) + if File.read(bundle_binstub, 300).include?("This file was generated by Bundler") + load(bundle_binstub) + else + abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run. +Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.") + end +end + +require "rubygems" +require "bundler/setup" + +load Gem.bin_path("debug", "rdbg") diff --git a/config/application.rb b/config/application.rb index fd9f0aa8..84a86e20 100644 --- a/config/application.rb +++ b/config/application.rb @@ -1,6 +1,12 @@ require_relative "boot" require "rails/all" +module ReduxScraper + class Application < Rails::Application + config.active_support.deprecation = :raise + end +end + # Require the gems listed in Gemfile, including any gems # you've limited to :test, :development, or :production. Bundler.require(*Rails.groups) diff --git a/db/redux_migrate/20241225183955_add_postable_type_enum.rb b/db/redux_migrate/20241225183955_add_postable_type_enum.rb new file mode 100644 index 00000000..b14b923e --- /dev/null +++ b/db/redux_migrate/20241225183955_add_postable_type_enum.rb @@ -0,0 +1,48 @@ +class AddPostableTypeEnum < ActiveRecord::Migration[7.2] + def up + execute <<-SQL + CREATE TYPE postable_type AS ENUM ('Domain::Fa::Post', 'Domain::E621::Post', 'Domain::Inkbunny::Post'); + SQL + + # Create a temporary column with the new type + add_column :indexed_posts, :postable_type_enum, :postable_type + + # Copy data from the old column to the new one + execute <<-SQL + UPDATE indexed_posts + SET postable_type_enum = postable_type::postable_type; + SQL + + # Remove the old column and rename the new one + remove_column :indexed_posts, :postable_type + rename_column :indexed_posts, :postable_type_enum, :postable_type + + # make the new column non-nullable + change_column_null :indexed_posts, :postable_type, false + + # index on the polymorphic association + add_index :indexed_posts, %i[postable_id postable_type], unique: true + end + + def down + # Create a temporary string column + add_column :indexed_posts, :postable_type_string, :string + + # Copy data from enum to string + execute <<-SQL + UPDATE indexed_posts + SET postable_type_string = postable_type::text; + SQL + + # Remove enum column and rename string column + remove_column :indexed_posts, :postable_type + rename_column :indexed_posts, :postable_type_string, :postable_type + + # Drop the enum type + execute <<-SQL + DROP TYPE postable_type; + SQL + + change_column_null :indexed_posts, :postable_type, true + end +end diff --git a/db/schema.rb b/db/schema.rb index cddd8fe3..51a982f5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2024_12_24_061643) do +ActiveRecord::Schema[7.2].define(version: 2024_12_25_183955) do # These are extensions that must be enabled in order to support this database enable_extension "pg_prewarm" enable_extension "pg_stat_statements" @@ -19,6 +19,10 @@ ActiveRecord::Schema[7.2].define(version: 2024_12_24_061643) do enable_extension "plpgsql" enable_extension "vector" + # Custom types defined in this database. + # Note that some types may not work with other database engines. Be careful if changing database. + create_enum "postable_type", ["Domain::Fa::Post", "Domain::E621::Post", "Domain::Inkbunny::Post"] + create_table "blob_entries_p", id: false, force: :cascade do |t| t.binary "sha256", null: false t.binary "base_sha256" @@ -1739,9 +1743,9 @@ ActiveRecord::Schema[7.2].define(version: 2024_12_24_061643) do create_table "indexed_posts", force: :cascade do |t| t.integer "postable_id", null: false - t.string "postable_type", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.enum "postable_type", null: false, enum_type: "postable_type" t.index ["created_at"], name: "index_indexed_posts_on_created_at" t.index ["postable_id", "postable_type"], name: "index_indexed_posts_on_postable_id_and_postable_type", unique: true end diff --git a/rake/indexed_posts.rake b/rake/indexed_posts.rake new file mode 100644 index 00000000..f9df9198 --- /dev/null +++ b/rake/indexed_posts.rake @@ -0,0 +1,44 @@ +namespace :indexed_posts do + desc "Ensure all Domain::*::Post records have an IndexedPost record" + task ensure_all_posts: %i[ensure_fa_posts ensure_e621_posts ensure_ib_posts] + + desc "Ensure all Domain::Fa::Post records have an IndexedPost record" + task ensure_fa_posts: %i[environment set_logger_stdout] do + IndexedPostsRake.ensure_indexed_post_on( + IndexedPostsRake.model_without_indexed_post(Domain::Fa::Post), + ) + end + + desc "Ensure all Domain::E621::Post records have an IndexedPost record" + task ensure_e621_posts: %i[environment set_logger_stdout] do + IndexedPostsRake.ensure_indexed_post_on( + IndexedPostsRake.model_without_indexed_post(Domain::E621::Post), + ) + end + + desc "Ensure all Domain::Inkbunny::Post records have an IndexedPost record" + task ensure_ib_posts: %i[environment set_logger_stdout] do + IndexedPostsRake.ensure_indexed_post_on( + IndexedPostsRake.model_without_indexed_post(Domain::Inkbunny::Post), + ) + end +end + +module IndexedPostsRake + def self.model_without_indexed_post(model) + model.left_outer_joins(:indexed_post).where(indexed_posts: { id: nil }) + end + + def self.ensure_indexed_post_on(relation) + total = relation.count + Rails.logger.info( + "Ensuring indexed post on #{relation.name.bold} (#{total} total to process)", + ) + progress = ProgressBar.create(total: total, throttle_rate: 0.2) + relation.find_each do |post| + post.ensure_indexed_post! + post.save! + progress.increment + end + end +end diff --git a/spec/jobs/domain/inkbunny/file_job_spec.rb b/spec/jobs/domain/inkbunny/file_job_spec.rb index 65e9ccf1..12a40b5f 100644 --- a/spec/jobs/domain/inkbunny/file_job_spec.rb +++ b/spec/jobs/domain/inkbunny/file_job_spec.rb @@ -1,26 +1,28 @@ require "rails_helper" -describe Domain::Inkbunny::Job::FileJob do +module FileJobSpec AN_IMAGE_SHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" AN_IMAGE_MD5 = "d41d8cd98f00b204e9800998ecf8427e" AN_IMAGE_PATH = "domain/e621/job/an-image.png" AN_IMAGE_URL = "https://static1.e621.net/file/foo.png" +end +describe Domain::Inkbunny::Job::FileJob do let(:http_client_mock) { instance_double("::Scraper::HttpClient") } before { Scraper::ClientFactory.http_client_mock = http_client_mock } let(:file) do Domain::Inkbunny::File.create!( { ib_file_id: 12_345, - md5_initial: AN_IMAGE_MD5, - md5_full: AN_IMAGE_MD5, - url_str: AN_IMAGE_URL, + md5_initial: FileJobSpec::AN_IMAGE_MD5, + md5_full: FileJobSpec::AN_IMAGE_MD5, + url_str: FileJobSpec::AN_IMAGE_URL, file_name: "foo.png", ib_created_at: Time.now, file_order: 1, md5s: { - initial_file_md5: AN_IMAGE_MD5 + initial_file_md5: FileJobSpec::AN_IMAGE_MD5, }, post: Domain::Inkbunny::Post.create!( @@ -28,11 +30,11 @@ describe Domain::Inkbunny::Job::FileJob do ib_post_id: 67_891, creator: Domain::Inkbunny::User.create!( - { ib_user_id: 12_345, name: "TheUser" } - ) - } - ) - } + { ib_user_id: 12_345, name: "TheUser" }, + ), + }, + ), + }, ) end @@ -43,19 +45,19 @@ describe Domain::Inkbunny::Job::FileJob do http_client_mock, [ { - uri: AN_IMAGE_URL, + uri: FileJobSpec::AN_IMAGE_URL, status_code: 200, content_type: "image/png", - contents: SpecUtil.read_fixture_file(AN_IMAGE_PATH), - caused_by_entry: hle - } - ] + contents: SpecUtil.read_fixture_file(FileJobSpec::AN_IMAGE_PATH), + caused_by_entry: hle, + }, + ], ) perform_now({ file: file, caused_by_entry: hle }) file.reload expect(file.log_entry.response).to eq(file.blob_entry) - expect(file.blob_entry.sha256_hex).to eq(AN_IMAGE_SHA256) + expect(file.blob_entry.sha256_hex).to eq(FileJobSpec::AN_IMAGE_SHA256) end it "marks the post as errored if the download fails" do @@ -64,12 +66,12 @@ describe Domain::Inkbunny::Job::FileJob do http_client_mock, [ { - uri: AN_IMAGE_URL, + uri: FileJobSpec::AN_IMAGE_URL, status_code: 404, content_type: "text/html", - contents: "not found" - } - ] + contents: "not found", + }, + ], ) perform_now({ file: file }) @@ -81,8 +83,8 @@ describe Domain::Inkbunny::Job::FileJob do { "status_code" => 404, "log_entry_id" => hles[0].id, - "retry_count" => 1 - } + "retry_count" => 1, + }, ) end @@ -92,18 +94,18 @@ describe Domain::Inkbunny::Job::FileJob do http_client_mock, [ { - uri: AN_IMAGE_URL, + uri: FileJobSpec::AN_IMAGE_URL, status_code: 500, content_type: "text/html", - contents: "not found" + contents: "not found", }, { - uri: AN_IMAGE_URL, + uri: FileJobSpec::AN_IMAGE_URL, status_code: 200, content_type: "image/png", - contents: SpecUtil.read_fixture_file(AN_IMAGE_PATH) - } - ] + contents: SpecUtil.read_fixture_file(FileJobSpec::AN_IMAGE_PATH), + }, + ], ) perform_now({ file: file }, should_raise: true) @@ -113,7 +115,7 @@ describe Domain::Inkbunny::Job::FileJob do perform_now({ file: file }) file.reload expect(file.blob_entry).not_to be_nil - expect(file.blob_entry.sha256_hex).to eq(AN_IMAGE_SHA256) + expect(file.blob_entry.sha256_hex).to eq(FileJobSpec::AN_IMAGE_SHA256) end it "throws on a non-404 error in order to retry later" do @@ -123,12 +125,12 @@ describe Domain::Inkbunny::Job::FileJob do http_client_mock, [ { - uri: AN_IMAGE_URL, + uri: FileJobSpec::AN_IMAGE_URL, status_code: 500, content_type: "text/html", - contents: "not found" - } - ] * num_retries + contents: "not found", + }, + ] * num_retries, ) num_retries.times.map do |retry_num| @@ -140,8 +142,8 @@ describe Domain::Inkbunny::Job::FileJob do { "status_code" => 500, "log_entry_id" => hles[retry_num].id, - "retry_count" => retry_num + 1 - } + "retry_count" => retry_num + 1, + }, ) end diff --git a/spec/models/domain/e621/post_spec.rb b/spec/models/domain/e621/post_spec.rb index b1561405..22836868 100644 --- a/spec/models/domain/e621/post_spec.rb +++ b/spec/models/domain/e621/post_spec.rb @@ -3,7 +3,6 @@ require "rails_helper" describe Domain::E621::Post do it "ensures indexed_post is created" do post = SpecUtil.build_e621_post(created_at: 1.day.ago) - expect(post.indexed_post).to be_nil post.save! expect(post.indexed_post).to be_present expect(post.indexed_post.created_at).to eq(post.created_at) diff --git a/spec/models/domain/fa/post_spec.rb b/spec/models/domain/fa/post_spec.rb index 1ac63199..d562f580 100644 --- a/spec/models/domain/fa/post_spec.rb +++ b/spec/models/domain/fa/post_spec.rb @@ -3,7 +3,6 @@ require "rails_helper" describe Domain::Fa::Post do it "ensures indexed_post is created" do post = SpecUtil.build_domain_fa_post(created_at: 1.day.ago) - expect(post.indexed_post).to be_nil post.save! expect(post.indexed_post).to be_present expect(post.indexed_post.created_at).to eq(post.created_at)