Update project configuration and enhance OpenTelemetry integration

- Modified `.gitignore` to include and manage `.devcontainer/signoz/data/*` while preserving `.keep` files.
- Updated `.prettierrc` to include the `@prettier/plugin-xml` plugin and configured XML formatting options.
- Added OpenTelemetry SDK and exporter gems to the `Gemfile` for enhanced monitoring capabilities.
- Removed `package-lock.json` as part of the transition to Yarn for dependency management.
- Enhanced `.devcontainer` configuration with new services for SigNoz, including ClickHouse and related configurations.
- Introduced new ClickHouse configuration files for user and cluster settings.
- Updated Nginx and OpenTelemetry collector configurations to support new logging and monitoring features.
- Improved user experience in the `UserSearchBar` component by updating the placeholder text.

These changes aim to improve project maintainability, monitoring capabilities, and user experience.
This commit is contained in:
Dylan Knutson
2025-01-03 17:46:58 +00:00
parent d6afdf424b
commit 9f0f6877d9
35 changed files with 498 additions and 9561 deletions

View File

@@ -74,7 +74,7 @@ COPY --from=native-gems /usr/src/app/gems/rb-bsdiff /gems/rb-bsdiff
ENV RAILS_ENV development
# [Optional] Uncomment this line to install additional gems.
RUN su vscode -c "gem install bundler -v '2.4.1'" && \
RUN su vscode -c "gem install bundler -v '2.5.6'" && \
su vscode -c "gem install rake -v '13.0.6'" && \
su vscode -c "gem install ruby-lsp -v '0.22.1'"

View File

@@ -11,11 +11,11 @@ services:
- devcontainer-redux-gem-cache:/usr/local/rvm/gems
- devcontainer-redux-blob-files:/mnt/blob_files_development
- /tank/redux-data/blob_files_production:/mnt/blob_files_production
# Overrides default command so things don't shut down after the process ends.
environment:
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4318
OTEL_SERVICE_NAME: redux-scraper-dev
OTEL_RESOURCE_ATTRIBUTES: application=redux-scraper-dev
command: sleep infinity
# Runs app on the same network as the database container, allows "forwardPorts" in devcontainer.json function.
network_mode: service:db
db:

View File

@@ -9,7 +9,6 @@ echo "Rake: $(which rake)"
bundle install --jobs $(getconf _NPROCESSORS_ONLN)
rbenv rehash
npm install yarn
bin/rails yarn:install
yarn

View File

@@ -10,4 +10,5 @@ profiler
.vscode
launch.json
settings.json
*.export
*.export
.devcontainer

15
.env-cmdrc Normal file
View File

@@ -0,0 +1,15 @@
{
"dev": {
"RAILS_ENV": "development",
"FOO_BAR": "baz"
},
"staging": {
"RAILS_ENV": "staging"
},
"production": {
"RAILS_ENV": "production"
},
"worker": {
"RAILS_ENV": "worker"
}
}

3
.gitignore vendored
View File

@@ -12,6 +12,9 @@ lib/xdiff
ext/xdiff/Makefile
ext/xdiff/xdiff
# use yarn to manage node_modules
package-lock.json
*.notes.md
# Ignore bundler config.

View File

@@ -7,6 +7,9 @@
"plugins": [
"prettier-plugin-tailwindcss",
"@prettier/plugin-ruby",
"@prettier/plugin-xml",
"@4az/prettier-plugin-html-erb"
]
],
"xmlQuoteAttributes": "double",
"xmlWhitespaceSensitivity": "ignore"
}

View File

@@ -5,6 +5,9 @@
"workbench.preferredDarkColorTheme": "Spinel",
"workbench.preferredLightColorTheme": "Spinel Light",
"rubyLsp.formatter": "syntax_tree",
"files.associations": {
".env-cmdrc": "json"
},
"[ruby]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
@@ -29,6 +32,9 @@
"[jsonc]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[xml]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"tailwindCSS.includeLanguages": {
"erb": "html",
"typescript": "javascript"

View File

@@ -74,9 +74,13 @@ group :development, :staging do
require: %w[enable_rails_patches rack-mini-profiler]
gem "stackprof"
# temporary ref of rails_live_reload to a commit that adds ignore patterns
# to the Listen gem
gem "rails_live_reload",
git: "https://github.com/railsjazz/rails_live_reload",
ref: "dcd3b73904594e2c5134c2f6e05954f3937a8d29"
# git: "https://github.com/mktakuya/rails_live_reload",
# ref: "95d7ac7c03e8c702066ed3dc9cd70a965412e2d2"
# gem "rails_live_reload", "0.4.0"
end

View File

@@ -425,15 +425,15 @@ GEM
semantic_range (>= 2.3.0)
shoulda-matchers (6.4.0)
activesupport (>= 5.2.0)
sorbet (0.5.11708)
sorbet-static (= 0.5.11708)
sorbet-runtime (0.5.11708)
sorbet-static (0.5.11708-aarch64-linux)
sorbet-static (0.5.11708-universal-darwin)
sorbet-static (0.5.11708-x86_64-linux)
sorbet-static-and-runtime (0.5.11708)
sorbet (= 0.5.11708)
sorbet-runtime (= 0.5.11708)
sorbet (0.5.11711)
sorbet-static (= 0.5.11711)
sorbet-runtime (0.5.11711)
sorbet-static (0.5.11711-aarch64-linux)
sorbet-static (0.5.11711-universal-darwin)
sorbet-static (0.5.11711-x86_64-linux)
sorbet-static-and-runtime (0.5.11711)
sorbet (= 0.5.11711)
sorbet-runtime (= 0.5.11711)
spoom (1.5.0)
erubi (>= 1.10.0)
prism (>= 0.28.0)

View File

@@ -1,4 +1,4 @@
rails: RAILS_ENV=development ./bin/rails s -p 3000
rails: RAILS_ENV=development bundle exec rails s -p 3000
wp-client: RAILS_ENV=development HMR=true ./bin/webpacker-dev-server
wp-server: RAILS_ENV=development HMR=true SERVER_BUNDLE_ONLY=yes ./bin/webpacker --watch
css: RAILS_ENV=development yarn build:css[debug] --watch

View File

@@ -289,7 +289,7 @@ export default function UserSearchBar({ isServerRendered }: PropTypes) {
]
.filter(Boolean)
.join(' ')}
placeholder="Search FurAffinity Users"
placeholder="Search FurAffinity Users?!?"
defaultValue={state.userName}
onChange={(e) => {
setState((s) => ({ ...s, typingSettled: false }));

View File

@@ -16,13 +16,15 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
@full_scan = T.let(false, T::Boolean)
@force_scan = T.let(false, T::Boolean)
@last_page_post_ids = T.let(Set.new, T::Set[Integer])
@use_http_cache = T.let(false, T::Boolean)
end
sig { override.params(args: T::Hash[Symbol, T.untyped]).void }
def perform(args)
@first_job_entry = nil
user = init_from_args!(args, build_user: false)
@full_scan = !!args[:full_scan] || @force_scan
@full_scan = !!args[:full_scan]
@use_http_cache = !!args[:use_http_cache]
user ||
begin
defer_job(Domain::Fa::Job::UserPageJob, { url_name: args[:url_name] })
@@ -33,10 +35,17 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
logger.prefix = "[#{user.url_name&.bold} / #{user.state&.bold}]"
return unless user_due_for_scan?(:favs)
max_page_number = T.let([(user.num_favorites || 0) / 48, 100].max, Integer)
max_page_number =
T.let([((user.num_favorites || 0) + 1) / 48, 100].max, Integer)
logger.info "[max page number] [#{max_page_number.to_s.bold}]"
existing_faved_ids = Set.new(user.fav_post_joins.active.pluck(:post_id))
existing_faved_ids =
T.let(
Set.new(user.fav_post_joins.active.pluck(:post_id)),
T::Set[Integer],
)
to_add = T.let(Set.new, T::Set[Integer])
while true
ret = scan_page(user: user)
@@ -47,8 +56,20 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
new_favs = @last_page_post_ids - existing_faved_ids
if new_favs.empty?
user.scanned_favs_at = Time.zone.now
user.save!
logger.info "[no new favs] [stopping scan]"
to_add += @seen_post_ids - existing_faved_ids
logger.info "[partial scan] [add #{to_add.size.to_s.bold}] [remove none]"
ReduxApplicationRecord.transaction do
to_add.each_slice(1000) do |slice|
user.fav_post_joins.upsert_all(
slice.map { |id| { post_id: id, removed: false } },
unique_by: :index_domain_fa_favs_on_user_id_and_post_id,
update_only: [:removed],
)
end
user.save!
end
logger.info "[reached end of unobserved favs] [stopping scan]"
return
end
end
@@ -98,7 +119,12 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
else
"https://www.furaffinity.net/favorites/#{user.url_name}/"
end
response = http_client.get(url, caused_by_entry: causing_log_entry)
response =
http_client.get(
url,
caused_by_entry: causing_log_entry,
use_http_cache: @use_http_cache,
)
self.first_log_entry ||= response.log_entry
if response.status_code != 200
fatal_error(

View File

@@ -99,7 +99,7 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
end
end
logger.debug "#{HexUtil.humansize(response.log_entry.response.size)} / #{response.log_entry.content_type} / #{response.log_entry.response_time_ms} ms"
logger.debug "#{HexUtil.humansize(T.must(response.log_entry.response&.size))} / #{response.log_entry.content_type} / #{response.log_entry.response_time_ms} ms"
post.file = response.log_entry
post.save!
end

View File

@@ -1,31 +1,46 @@
# typed: false
# typed: strict
class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
queue_as :fa_user_gallery
MAX_PAGE_NUMBER = 350
class Folder < T::Struct
const :href, String
const :title, String
end
sig { params(args: T.untyped).void }
def initialize(*args)
super(*T.unsafe(args))
@go_until_end = T.let(false, T::Boolean)
@max_page_number = T.let(MAX_PAGE_NUMBER, Integer)
@visited = T.let(Set.new, T::Set[Folder])
@folders = T.let(Set.new, T::Set[Folder])
end
sig { override.params(args: T::Hash[Symbol, T.untyped]).void }
def perform(args)
init_from_args!(args)
user = T.must(@user)
if @user.state != "ok" && @user.scanned_gallery_at
logger.warn("state == #{@user.state} and already scanned, skipping")
if user.state != "ok" && user.scanned_gallery_at
logger.warn("state == #{user.state} and already scanned, skipping")
return
end
# buggy (sentinal) user
return if @user.id == 117_552 && @user.url_name == "click here"
return if user.id == 117_552 && user.url_name == "click here"
@go_until_end = @user.scanned_gallery_at.nil?
@first_gallery_page_entry = nil
@max_page_number = MAX_PAGE_NUMBER
if @user.num_submissions && @user.scanned_page_at &&
@user.scanned_page_at > 3.days.ago
@max_page_number = (@user.num_submissions * 72) + 3
@go_until_end = user.scanned_gallery_at.nil?
if (num_submissions = user.num_submissions) &&
(scanned_page_at = user.scanned_page_at) &&
(scanned_page_at > 3.days.ago)
@max_page_number = (num_submissions * 72) + 3
end
if !@user.due_for_gallery_scan? && !@force_scan
if !user.due_for_gallery_scan? && !@force_scan
logger.warn(
"gallery scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping",
"gallery scanned #{time_ago_in_words(user.scanned_page_at)}, skipping",
)
return
end
@@ -34,46 +49,54 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
@folders =
Set.new(
[
{ href: "/gallery/#{@user.url_name}/", title: "Main Gallery" },
{ href: "/scraps/#{@user.url_name}/", title: "Scraps" },
Folder.new(href: "/gallery/#{user.url_name}/", title: "Main Gallery"),
Folder.new(href: "/scraps/#{user.url_name}/", title: "Scraps"),
],
)
until (@folders - @visited).empty?
folder = (@folders - @visited).first
@visited.add folder
break if scan_folder(folder) == :break
break if scan_folder(user, folder) == :break
end
@user.log_entry_detail[
"last_gallery_page_id"
] = @first_gallery_page_entry&.id
@user.scanned_gallery_at = Time.now
@user.save!
user.log_entry_detail["last_gallery_page_id"] = first_log_entry&.id
user.scanned_gallery_at = Time.now
user.save!
end
private
def scan_folder(folder)
sig do
params(user: Domain::Fa::User, folder: Folder).returns(T.nilable(Symbol))
end
def scan_folder(user, folder)
page_number = 1
total_num_new_posts_seen = 0
total_num_posts_seen = 0
while true
if page_number >= @max_page_number
fatal_error(
"hit max #{page_number}) pages, num submissions: #{@user.num_submissions}",
"hit max #{page_number}) pages, num submissions: #{user.num_submissions}",
)
end
folder_href = folder[:href]
folder_href = folder.href
folder_href += "/" unless folder_href.end_with?("/")
folder_href = "/" + folder_href unless folder_href.start_with?("/")
page_url =
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
response = http_client.get(page_url, caused_by_entry: @caused_by_entry)
response = http_client.get(page_url, caused_by_entry: causing_log_entry)
log_entry = response.log_entry
@first_gallery_page_entry ||= response.log_entry
@caused_by_entry = @first_gallery_page_entry
if response.status_code == 200
enqueue_jobs_from_found_links(
log_entry,
suppress_jobs: [{ job: self.class, url_name: user.url_name }],
)
end
self.first_log_entry ||= log_entry
if response.status_code != 200
fatal_error(
@@ -82,7 +105,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
end
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
@user,
user,
response,
)
logger.error("account disabled / not found, abort")
@@ -92,17 +115,16 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
page = Domain::Fa::Parser::Page.new(response.body)
# newly instantiated users don't have a name yet, but can derive it from the gallery page
@user.name ||= page.user_page.name || @user.url_name
user.name ||= page.user_page.name || user.url_name
listing_page_stats =
update_and_enqueue_posts_from_listings_page(
:gallery_page,
page,
response.log_entry,
enqueue_posts_pri: :low,
enqueue_gallery_scan: false,
enqueue_page_scan: false,
page_desc: "#{folder[:title]}@#{page_number}",
page_desc: "#{folder.title}@#{page_number}",
)
total_num_new_posts_seen += listing_page_stats.new_seen
total_num_posts_seen += listing_page_stats.total_seen
@@ -114,13 +136,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
break if listing_page_stats.total_seen < 72
end
logger.info "folder `#{folder[:title].bold}` complete - #{total_num_new_posts_seen.to_s.bold} new, #{total_num_posts_seen.to_s.bold} total"
ensure
if response && response.status_code == 200
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: @user.url_name }],
)
end
logger.info "folder `#{folder.title.bold}` complete - #{total_num_new_posts_seen.to_s.bold} new, #{total_num_posts_seen.to_s.bold} total"
:continue
end
end

View File

@@ -18,7 +18,7 @@ class Domain::Twitter::Job::MediaJob < Domain::Twitter::Job::TwitterJobBase
response =
http_client.get(@media.url_str, caused_by_entry: @caused_by_entry)
logger.debug "#{HexUtil.humansize(response.log_entry.response.size)} / " +
logger.debug "#{HexUtil.humansize(T.must(response.log_entry.response&.size))} / " +
"#{response.log_entry.content_type} / " +
"#{response.log_entry.response_time_ms} ms"

View File

@@ -7,7 +7,11 @@ class Scraper::HttpClient
sig { returns(Scraper::HttpClientConfig) }
attr_reader :config
Response = Struct.new(:status_code, :body, :log_entry)
class Response < T::Struct
const :status_code, Integer
const :body, String
const :log_entry, HttpLogEntry
end
class InvalidURLError < ArgumentError
end
@@ -29,24 +33,74 @@ class Scraper::HttpClient
@http_performer.close!
end
sig { params(url: String, caused_by_entry: T.untyped).returns(Response) }
def get(url, caused_by_entry: nil)
do_request(:get, url, caused_by_entry)
sig do
params(
url: String,
caused_by_entry: T.untyped,
use_http_cache: T::Boolean,
).returns(Response)
end
def get(url, caused_by_entry: nil, use_http_cache: false)
do_request(:get, url, caused_by_entry, use_http_cache)
end
sig { params(url: String, caused_by_entry: T.untyped).returns(Response) }
def post(url, caused_by_entry: nil)
do_request(:post, url, caused_by_entry)
sig do
params(
url: String,
caused_by_entry: T.untyped,
use_http_cache: T::Boolean,
).returns(Response)
end
def post(url, caused_by_entry: nil, use_http_cache: false)
do_request(:post, url, caused_by_entry, use_http_cache)
end
private
sig do
params(method: Symbol, url: String, caused_by_entry: T.untyped).returns(
Response,
params(
from_cache: T::Boolean,
log_entry: HttpLogEntry,
total_time_ms: Integer,
).void
end
def print_request_performed_log_line(from_cache:, log_entry:, total_time_ms:)
response_code = T.must(log_entry.status_code)
response_code_colorized =
if response_code == 200
response_code.to_s.light_green
elsif (response_code / 100) == 4 || (response_code / 100) == 5
response_code.to_s.red.bold
else
response_code.to_s.yellow.bold
end
response_blob_entry = T.must(log_entry.response)
response_time_ms = T.must(log_entry.response_time_ms)
logger.info(
[
from_cache ? "[" + "CACHED".light_green.bold + "]" : nil,
"[entry #{log_entry.id.to_s.bold} /",
"GET #{response_code_colorized} /",
"#{HexUtil.humansize(T.must(response_blob_entry.bytes_stored)).bold} / #{HexUtil.humansize(T.must(response_blob_entry.size)).bold}]",
"[#{response_time_ms.to_s.bold} ms / #{total_time_ms.to_s.bold} ms]",
log_entry.uri.to_s.black,
].compact.join(" "),
)
end
def do_request(method, url, caused_by_entry)
sig do
params(
method: Symbol,
url: String,
caused_by_entry: T.untyped,
use_http_cache: T::Boolean,
).returns(Response)
end
def do_request(method, url, caused_by_entry, use_http_cache)
requested_at = Time.now
uri = Addressable::URI.parse(url)
uri.scheme = "https" if uri.scheme.blank?
uri.path = "/" if uri.path.blank?
@@ -59,6 +113,26 @@ class Scraper::HttpClient
)
end
if use_http_cache
if (cached_response = HttpLogEntry.find_by_uri(uri)) &&
(status_code = cached_response.status_code) &&
(body = cached_response.response&.contents)
print_request_performed_log_line(
from_cache: true,
log_entry: cached_response,
total_time_ms: ((Time.now - requested_at) * 1000).to_i,
)
return(
Response.new(
status_code: status_code,
body: body,
log_entry: cached_response,
)
)
end
end
ratelimit_conf =
config.ratelimit.find { |conf| File.fnmatch? conf[0], uri.host } ||
["*", :none]
@@ -85,7 +159,6 @@ class Scraper::HttpClient
@cookie_jar.cookies(Addressable::URI.encode url),
),
}
requested_at = Time.now
response = @http_performer.do_request(method, url, request_headers)
response_code = response.response_code
@@ -143,24 +216,21 @@ class Scraper::HttpClient
raise
end
response_code_colorized =
if response_code == 200
response_code.to_s.light_green
elsif (response_code / 100) == 4 || (response_code / 100) == 5
response_code.to_s.red.bold
else
response_code.to_s.yellow.bold
end
total_time_ms = ((Time.now - requested_at) * 1000).round(0)
logger.info(
[
"[entry #{log_entry.id.to_s.bold} /",
"GET #{response_code_colorized} /",
"#{HexUtil.humansize(T.must(response_blob_entry.bytes_stored)).bold} / #{HexUtil.humansize(T.must(response_blob_entry.size)).bold}]",
"[#{response_time_ms.to_s.bold} ms / #{total_time_ms.to_s.bold} ms]",
scrubbed_uri.to_s.black,
].reject(&:nil?).join(" "),
# logger.info(
# [
# "[entry #{log_entry.id.to_s.bold} /",
# "GET #{response_code_colorized} /",
# "#{HexUtil.humansize(T.must(response_blob_entry.bytes_stored)).bold} / #{HexUtil.humansize(T.must(response_blob_entry.size)).bold}]",
# "[#{response_time_ms.to_s.bold} ms / #{total_time_ms.to_s.bold} ms]",
# scrubbed_uri.to_s.black,
# ].reject(&:nil?).join(" "),
# )
#
total_time_ms = ((Time.now - requested_at) * 1000).to_i
print_request_performed_log_line(
from_cache: false,
log_entry: log_entry,
total_time_ms: total_time_ms,
)
if response_code == 524 || response_code == 502 || response_code == 503 ||
@@ -176,7 +246,11 @@ class Scraper::HttpClient
raise
end
Response.new(response_code, response_body, log_entry)
Response.new(
status_code: response_code,
body: response_body,
log_entry: log_entry,
)
end
sig { params(domain: String).returns(T::Boolean) }

View File

@@ -50,6 +50,30 @@ class HttpLogEntry < ReduxApplicationRecord
find_by(uri_host: uri.host, uri_path: uri.path)
end
sig do
params(uri: T.any(String, Addressable::URI)).returns(
T.nilable(HttpLogEntry),
)
end
def self.find_by_uri(uri)
find_all_by_uri(uri).first
end
sig do
params(uri: T.any(String, Addressable::URI)).returns(
::ActiveRecord::Relation,
)
end
def self.find_all_by_uri(uri)
uri = Addressable::URI.parse(uri) unless uri.is_a?(Addressable::URI)
where(
uri_host: uri.host,
uri_path: uri.path,
uri_query: uri.query,
uri_hash: uri.fragment,
).order(requested_at: :desc)
end
sig { returns(T.nilable(Integer)) }
def response_size
if association(:response).loaded?

View File

@@ -2,15 +2,29 @@
# frozen_string_literal: true
require "rails_live_reload"
# This is a hack to fix the Listen gem ignoring the .devcontainer folder
original_socket_path = RailsLiveReload.config.socket_path
RailsLiveReload
.config
.define_singleton_method(:socket_path) { original_socket_path }
RailsLiveReload.config.instance_variable_set(
:@root_path,
Rails.root.join("app"),
)
RailsLiveReload.configure do |config|
config.url = "/rails/live/reload"
# Default watched folders & files
# config.watch %r{app/views/.+\.(erb|haml|slim)$}
# config.watch %r{(app|vendor)/(assets|javascript)/\w+/(.+\.(css|js|html|png|jpg|ts|jsx)).*}, reload: :always
# "root path" is already set to app/, so don't include that as a prefix
config.watch %r{views/.+\.(erb|haml|slim)$}
# app/javscript is not in the reload path. HMR already handles JS hot reloading
config.watch %r{assets/\w+/(.+\.(css|js|html|png|jpg|ts|jsx)).*},
reload: :always
# More examples:
# config.watch %r{app/helpers/.+\.rb}, reload: :always
config.watch %r{helpers/.+\.rb}, reload: :always
# config.watch %r{config/locales/.+\.yml}, reload: :always
config.enabled = Rails.env.development? || Rails.env.staging?

View File

@@ -17,7 +17,7 @@ psql-dump-domain-fa-favs:
@psql -P pager=off -c 'select user_id, post_id, 1 from domain_fa_favs limit 10000000;' -d redux_prod -h 10.166.33.171 -U scraper_redux -t -A -F ' '
test:
bundle exec srb tc && bin/rake parallel:spec
bundle exec srb tc && RAILS_ENV=test bin/rake parallel:spec
tc *args:
bundle exec srb tc {{args}}

9450
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -48,8 +48,10 @@
"@4az/prettier-plugin-html-erb": "^0.0.6",
"@pmmmwh/react-refresh-webpack-plugin": "^0.5.10",
"@prettier/plugin-ruby": "^4.0.4",
"@prettier/plugin-xml": "^3.4.1",
"@types/lodash": "^4.14.192",
"@types/react": "^18.0.33",
"env-cmd": "^10.1.0",
"prettier": "^3.4.2",
"prettier-plugin-tailwindcss": "^0.6.9",
"react-refresh": "^0.14.0",

View File

@@ -4,6 +4,7 @@
--ignore=tmp/
--ignore=vendor/
--ignore=gems/rb-bsdiff
--ignore=.devcontainer
--suppress-payload-superclass-redefinition-for=Net::IMAP::Atom
--suppress-payload-superclass-redefinition-for=Net::IMAP::Literal
--suppress-payload-superclass-redefinition-for=Net::IMAP::MessageSet

View File

@@ -29,6 +29,7 @@ class ApplicationController
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::GoodJobHelper
include ::IndexablePostsHelper
include ::LogEntriesHelper
include ::SourceHelper

View File

@@ -26,6 +26,7 @@ class DeviseController
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::GoodJobHelper
include ::IndexablePostsHelper
include ::LogEntriesHelper
include ::SourceHelper

View File

@@ -15,13 +15,13 @@ class Domain::Fa::Job::UserGalleryJob
sig do
params(
args: T.untyped,
args: T::Hash[::Symbol, T.untyped],
block: T.nilable(T.proc.params(job: Domain::Fa::Job::UserGalleryJob).void)
).returns(T.any(Domain::Fa::Job::UserGalleryJob, FalseClass))
end
def perform_later(args, &block); end
sig { params(args: T.untyped).returns(T.untyped) }
sig { params(args: T::Hash[::Symbol, T.untyped]).void }
def perform_now(args); end
end
end

View File

@@ -430,23 +430,44 @@ class GoodJob::DiscreteExecution
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Job) }
def build_job(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJobExecutionLogLinesCollection) }
def build_log_lines_collection(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Job) }
def create_job(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Job) }
def create_job!(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJobExecutionLogLinesCollection) }
def create_log_lines_collection(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJobExecutionLogLinesCollection) }
def create_log_lines_collection!(*args, &blk); end
sig { returns(T.nilable(::GoodJob::Job)) }
def job; end
sig { params(value: T.nilable(::GoodJob::Job)).void }
def job=(value); end
sig { returns(T.nilable(::GoodJobExecutionLogLinesCollection)) }
def log_lines_collection; end
sig { params(value: T.nilable(::GoodJobExecutionLogLinesCollection)).void }
def log_lines_collection=(value); end
sig { returns(T.nilable(::GoodJob::Job)) }
def reload_job; end
sig { returns(T.nilable(::GoodJobExecutionLogLinesCollection)) }
def reload_log_lines_collection; end
sig { void }
def reset_job; end
sig { void }
def reset_log_lines_collection; end
end
module GeneratedAssociationRelationMethods

View File

@@ -445,12 +445,21 @@ class GoodJob::Execution
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Job) }
def build_job(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJobExecutionLogLinesCollection) }
def build_log_lines_collection(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Job) }
def create_job(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Job) }
def create_job!(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJobExecutionLogLinesCollection) }
def create_log_lines_collection(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJobExecutionLogLinesCollection) }
def create_log_lines_collection!(*args, &blk); end
sig { returns(T.nilable(::GoodJob::Job)) }
def job; end
@@ -463,11 +472,23 @@ class GoodJob::Execution
sig { returns(T::Boolean) }
def job_previously_changed?; end
sig { returns(T.nilable(::GoodJobExecutionLogLinesCollection)) }
def log_lines_collection; end
sig { params(value: T.nilable(::GoodJobExecutionLogLinesCollection)).void }
def log_lines_collection=(value); end
sig { returns(T.nilable(::GoodJob::Job)) }
def reload_job; end
sig { returns(T.nilable(::GoodJobExecutionLogLinesCollection)) }
def reload_log_lines_collection; end
sig { void }
def reset_job; end
sig { void }
def reset_log_lines_collection; end
end
module GeneratedAssociationRelationMethods

View File

@@ -420,7 +420,34 @@ class GoodJobExecutionLogLinesCollection
def third_to_last!; end
end
module GeneratedAssociationMethods; end
module GeneratedAssociationMethods
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Execution) }
def build_good_job_execution(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Execution) }
def create_good_job_execution(*args, &blk); end
sig { params(args: T.untyped, blk: T.untyped).returns(::GoodJob::Execution) }
def create_good_job_execution!(*args, &blk); end
sig { returns(T.nilable(::GoodJob::Execution)) }
def good_job_execution; end
sig { params(value: T.nilable(::GoodJob::Execution)).void }
def good_job_execution=(value); end
sig { returns(T::Boolean) }
def good_job_execution_changed?; end
sig { returns(T::Boolean) }
def good_job_execution_previously_changed?; end
sig { returns(T.nilable(::GoodJob::Execution)) }
def reload_good_job_execution; end
sig { void }
def reset_good_job_execution; end
end
module GeneratedAssociationRelationMethods
sig { returns(PrivateAssociationRelation) }

View File

@@ -29,6 +29,7 @@ class Rails::Conductor::BaseController
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::GoodJobHelper
include ::IndexablePostsHelper
include ::LogEntriesHelper
include ::SourceHelper

View File

@@ -185,6 +185,86 @@ describe Domain::Fa::Job::FavsJob do
expect(user.scanned_favs_at).to be_within(1.second).of(Time.now)
end
end
context "incremental scan behavior" do
include_context "user exists"
let(:fa_ids_page1) { [52_106_426, 36_755_337] }
let(:fa_ids_page2) { [40_769_488, 20_808_448] }
let(:fa_ids_page3) { [20_585_829] }
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/favorites/zzreg/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/job/favs_zzreg_page_0_first.html",
),
},
{
uri: "https://www.furaffinity.net/favorites/zzreg/1074627373/next",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/job/favs_zzreg_page_1_1074627373.html",
),
caused_by_entry_idx: 0,
},
# {
# uri: "https://www.furaffinity.net/favorites/zzreg/475297391/next",
# status_code: 200,
# content_type: "text/html",
# contents:
# SpecUtil.read_fixture_file(
# "domain/fa/job/favs_zzreg_page_2_475297391.html",
# ),
# caused_by_entry_idx: 0,
# },
]
end
before do
# Create some existing favs that would be on page 2 and 3
existing_posts =
Domain::Fa::Post.create!(
[
{ fa_id: fa_ids_page2[0], creator: user },
{ fa_id: fa_ids_page2[1], creator: user },
{ fa_id: fa_ids_page3[0], creator: user },
],
)
user.fav_posts << existing_posts
user.update!(scanned_favs_at: 2.years.ago)
end
it "stops scanning when no new favs are found and adds posts from scanned pages" do
# Should only create posts from page 1 since those are the only new ones
expect do perform_now({ url_name: "zzreg" }) end.to change(
Domain::Fa::Post,
:count,
).by(2)
# Should have added the new posts from page 1 to user's favs
user.reload
expect(
user.active_fav_posts.where(fa_id: fa_ids_page1).pluck(:fa_id),
).to eq(fa_ids_page1)
# Should still have the existing favs
expect(
user
.active_fav_posts
.where(fa_id: fa_ids_page2 + fa_ids_page3)
.pluck(:fa_id),
).to eq(fa_ids_page2 + fa_ids_page3)
# Should have updated scanned_favs_at
expect(user.scanned_favs_at).to be_within(1.second).of(Time.now)
end
end
end
context "2024 format for favs pages" do

View File

@@ -17,6 +17,7 @@
require "./spec/helpers/spec_helpers"
require "./spec/helpers/debug_helpers"
require "./spec/support/matchers/html_matchers"
require "./spec/support/matchers/job_matchers"
require "rspec/sorbet"
RSpec::Sorbet.allow_doubles!

View File

@@ -2,7 +2,9 @@
require "rspec/mocks"
class SpecUtil
include HasColorLogger
extend RSpec::Mocks::ExampleMethods
extend RSpec::Matchers
def self.random_string(length = 8)
(0...length).map { (65 + rand(26)).chr }.join
@@ -106,14 +108,18 @@ class SpecUtil
expect(http_client_mock).to(
receive(method).with(
log_entry.uri.to_s,
{ caused_by_entry: caused_by_entry },
).and_return(
Scraper::HttpClient::Response.new(
log_entry.status_code,
log_entry.response.contents,
log_entry,
http_client_opts_with(
caused_by_entry: caused_by_entry,
use_http_cache: request[:use_http_cache],
),
),
) do |uri, opts|
logger.info "[mock http client] [#{method}] [#{uri}] [#{opts.inspect.truncate(80)}]"
Scraper::HttpClient::Response.new(
status_code: log_entry.status_code,
body: log_entry.response.contents,
log_entry: log_entry,
)
end,
)
end

View File

@@ -1 +1,7 @@
# typed: strict
# typed: false
RSpec::Matchers.define :http_client_opts_with do |opts|
match do |actual|
actual[:caused_by_entry] == opts[:caused_by_entry] &&
!!actual[:use_http_cache] == !!opts[:use_http_cache]
end
end

View File

@@ -1155,6 +1155,13 @@
resolved "https://registry.yarnpkg.com/@prettier/plugin-ruby/-/plugin-ruby-4.0.4.tgz#73d85fc2a1731a3f62b57ac3116cf1c234027cb6"
integrity sha512-lCpvfS/dQU5WrwN3AQ5vR8qrvj2h5gE41X08NNzAAXvHdM4zwwGRcP2sHSxfu6n6No+ljWCVx95NvJPFTTjCTg==
"@prettier/plugin-xml@^3.4.1":
version "3.4.1"
resolved "https://registry.npmjs.org/@prettier/plugin-xml/-/plugin-xml-3.4.1.tgz"
integrity sha512-Uf/6/+9ez6z/IvZErgobZ2G9n1ybxF5BhCd7eMcKqfoWuOzzNUxBipNo3QAP8kRC1VD18TIo84no7LhqtyDcTg==
dependencies:
"@xml-tools/parser" "^1.0.11"
"@sinclair/typebox@^0.25.16":
version "0.25.24"
resolved "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.25.24.tgz"
@@ -1526,6 +1533,13 @@
resolved "https://registry.npmjs.org/@webpack-cli/serve/-/serve-1.7.0.tgz"
integrity sha512-oxnCNGj88fL+xzV+dacXs44HcDwf1ovs3AuEzvP7mqXw7fQntqIhQ1BRmynh4qEKQSSSRSWVyXRjmTbZIX9V2Q==
"@xml-tools/parser@^1.0.11":
version "1.0.11"
resolved "https://registry.npmjs.org/@xml-tools/parser/-/parser-1.0.11.tgz"
integrity sha512-aKqQ077XnR+oQtHJlrAflaZaL7qZsulWc/i/ZEooar5JiWj1eLt0+Wg28cpa+XLney107wXqneC+oG1IZvxkTA==
dependencies:
chevrotain "7.1.1"
"@xtuc/ieee754@^1.2.0":
version "1.2.0"
resolved "https://registry.npmjs.org/@xtuc/ieee754/-/ieee754-1.2.0.tgz"
@@ -1880,6 +1894,13 @@ chalk@^4.0, chalk@^4.0.0, chalk@^4.1.2:
ansi-styles "^4.1.0"
supports-color "^7.1.0"
chevrotain@7.1.1:
version "7.1.1"
resolved "https://registry.npmjs.org/chevrotain/-/chevrotain-7.1.1.tgz"
integrity sha512-wy3mC1x4ye+O+QkEinVJkPf5u2vsrDIYW9G7ZuwFl6v/Yu0LwUuT2POsb+NUWApebyxfkQq6+yDfRExbnI5rcw==
dependencies:
regexp-to-ast "0.5.0"
chokidar@^3.5.3, chokidar@^3.6.0:
version "3.6.0"
resolved "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz"
@@ -2393,6 +2414,14 @@ entities@^4.2.0:
resolved "https://registry.npmjs.org/entities/-/entities-4.4.0.tgz"
integrity sha512-oYp7156SP8LkeGD0GF85ad1X9Ai79WtRsZ2gxJqtBuzH+98YUV6jkHEKlZkMbcrjJjIVJNIDP/3WL9wQkoPbWA==
env-cmd@^10.1.0:
version "10.1.0"
resolved "https://registry.npmjs.org/env-cmd/-/env-cmd-10.1.0.tgz"
integrity sha512-mMdWTT9XKN7yNth/6N6g2GuKuJTsKMDHlQFUDacb/heQRRWOTIZ42t1rMHnQu4jYxU1ajdTeJM+9eEETlqToMA==
dependencies:
commander "^4.0.0"
cross-spawn "^7.0.0"
envinfo@^7.7.3:
version "7.8.1"
resolved "https://registry.npmjs.org/envinfo/-/envinfo-7.8.1.tgz"
@@ -4178,6 +4207,11 @@ regenerator-transform@^0.15.1:
dependencies:
"@babel/runtime" "^7.8.4"
regexp-to-ast@0.5.0:
version "0.5.0"
resolved "https://registry.npmjs.org/regexp-to-ast/-/regexp-to-ast-0.5.0.tgz"
integrity sha512-tlbJqcMHnPKI9zSrystikWKwHkBqu2a/Sgw01h3zFjvYrMxEDYHzzoMZnUrbIfpTFEsoRnnviOXNCzFiSc54Qw==
regexpu-core@^5.3.1:
version "5.3.2"
resolved "https://registry.npmjs.org/regexpu-core/-/regexpu-core-5.3.2.tgz"