diff --git a/app/controllers/domain/posts_controller.rb b/app/controllers/domain/posts_controller.rb
index f226df78..5ae44a41 100644
--- a/app/controllers/domain/posts_controller.rb
+++ b/app/controllers/domain/posts_controller.rb
@@ -10,6 +10,8 @@ class Domain::PostsController < DomainController
before_action :set_post_group!, only: %i[posts_in_group]
class PostsIndexViewConfig < T::ImmutableStruct
+ include T::Struct::ActsAsComparable
+
const :show_domain_filters, T::Boolean
const :show_creator_links, T::Boolean
const :index_type_header, String
diff --git a/app/controllers/domain_controller.rb b/app/controllers/domain_controller.rb
index d333b5d5..dd416784 100644
--- a/app/controllers/domain_controller.rb
+++ b/app/controllers/domain_controller.rb
@@ -5,6 +5,8 @@ class DomainController < ApplicationController
abstract!
class DomainParamConfig < T::ImmutableStruct
+ include T::Struct::ActsAsComparable
+
const :post_id_param, Symbol
const :user_id_param, Symbol
const :post_group_id_param, Symbol
diff --git a/app/helpers/domain/users_helper.rb b/app/helpers/domain/users_helper.rb
index 5575c841..17ab5e3a 100644
--- a/app/helpers/domain/users_helper.rb
+++ b/app/helpers/domain/users_helper.rb
@@ -91,6 +91,8 @@ module Domain::UsersHelper
end
class StatRow < T::ImmutableStruct
+ include T::Struct::ActsAsComparable
+
const :name, String
const :value,
T.nilable(
diff --git a/app/jobs/domain/fa/job/base.rb b/app/jobs/domain/fa/job/base.rb
index c46e44fb..369df97d 100644
--- a/app/jobs/domain/fa/job/base.rb
+++ b/app/jobs/domain/fa/job/base.rb
@@ -12,6 +12,11 @@ class Domain::Fa::Job::Base < Scraper::JobBase
protected
+ sig { returns(T::Boolean) }
+ def skip_enqueue_found_links?
+ !!arguments[0][:skip_enqueue_found_links]
+ end
+
sig { params(build_post: T::Boolean).returns(Domain::Post::FaPost) }
def post_from_args!(build_post: false)
args = arguments[0]
@@ -394,4 +399,106 @@ class Domain::Fa::Job::Base < Scraper::JobBase
user.association(:avatar).reload
end
end
+
+ FoundLink = Scraper::LinkFinder::FoundLink
+
+ sig do
+ params(log_entry: HttpLogEntry, suppress_jobs: T::Array[T.untyped]).void
+ end
+ def enqueue_jobs_from_found_links(log_entry, suppress_jobs: [])
+ return if skip_enqueue_found_links?
+
+ start_time = Time.now
+ unless PERMITTED_CONTENT_TYPES.any? { |ct|
+ ct.match(log_entry.content_type)
+ }
+ raise("unsupported content type: #{log_entry.content_type}")
+ end
+ document = log_entry.response&.contents || return
+ link_finder = Scraper::LinkFinder.new(T.must(log_entry.uri_host), document)
+ link_finder.logger.level = :error
+ links = link_finder.find_links
+ job_defs = []
+
+ url_names =
+ links.filter_map do |link|
+ link.is_a?(FoundLink::FaUser) ? link.url_name : nil
+ end
+ url_name_to_fa_user =
+ T.let(
+ Domain::User::FaUser.where(url_name: url_names).index_by(&:url_name),
+ T::Hash[String, Domain::User::FaUser],
+ )
+
+ fa_ids =
+ links.filter_map do |link|
+ link.is_a?(FoundLink::FaPost) ? link.fa_id : nil
+ end
+ fa_id_to_fa_post =
+ T.cast(
+ Domain::Post::FaPost.where(fa_id: fa_ids).index_by(&:fa_id),
+ T::Hash[Integer, Domain::Post::FaPost],
+ )
+
+ links
+ .filter_map do |link|
+ if link.is_a?(FoundLink::FaUser) || link.is_a?(FoundLink::FaPost)
+ link
+ else
+ nil
+ end
+ end
+ .each do |link|
+ case link
+ when FoundLink::FaUser
+ url_name = link.url_name
+ user =
+ url_name_to_fa_user[url_name] ||
+ Domain::User::FaUser.create!(url_name:) do |user|
+ user.name ||= url_name
+ end
+ enqueue_user_scan(user)
+ when FoundLink::FaPost
+ fa_id = link.fa_id
+ post =
+ fa_id_to_fa_post[fa_id] ||
+ Domain::Post::FaPost.build(fa_id:) do |post|
+ post.first_seen_entry_id = log_entry.id
+ end
+ if post.new_record?
+ post.save!
+ defer_job(Domain::Fa::Job::ScanPostJob, { post: })
+ end
+ end
+ end
+
+ job_defs.uniq!
+ job_defs.reject! do |job_def|
+ suppress_jobs.any? do |suppress|
+ suppress == job_def.slice(*suppress.keys)
+ end
+ end
+ job_defs.each do |job_def|
+ job_class = job_def[:job]
+ params = job_def[:params]
+ desc = job_def[:desc]
+ logger.debug(
+ [
+ "link finder -",
+ job_class.name.split("::").last.to_s.ljust(22).bold.light_black,
+ desc,
+ ].join(" "),
+ )
+
+ defer_job(job_class, params.merge({ caused_by_entry: log_entry }))
+ end
+ duration_ms = (1000 * (Time.now - start_time)).to_i.to_s
+ logger.info(
+ "link finder - enqueue #{job_defs.size.to_s.light_white.bold} jobs (#{duration_ms.bold} ms)",
+ )
+ rescue StandardError => e
+ logger.error(
+ "link finder - error enqueuing jobs: #{e.class.name} - #{e.message}",
+ )
+ end
end
diff --git a/app/jobs/domain/fa/job/browse_page_job.rb b/app/jobs/domain/fa/job/browse_page_job.rb
index 0155b6c2..c44615d6 100644
--- a/app/jobs/domain/fa/job/browse_page_job.rb
+++ b/app/jobs/domain/fa/job/browse_page_job.rb
@@ -57,6 +57,8 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
)
end
+ enqueue_jobs_from_found_links(response.log_entry)
+
page = Domain::Fa::Parser::Page.new(response.body)
listing_page_stats =
update_and_enqueue_posts_from_listings_page(
diff --git a/app/jobs/domain/fa/job/user_gallery_job.rb b/app/jobs/domain/fa/job/user_gallery_job.rb
index 06f94c91..a596720a 100644
--- a/app/jobs/domain/fa/job/user_gallery_job.rb
+++ b/app/jobs/domain/fa/job/user_gallery_job.rb
@@ -93,15 +93,8 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
page_url =
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
response = http_client.get(page_url)
- log_entry = response.log_entry
-
fatal_error("failed to scan folder page") if response.status_code != 200
- enqueue_jobs_from_found_links(
- log_entry,
- suppress_jobs: [{ job: self.class, url_name: user.url_name }],
- )
-
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
user,
response,
diff --git a/app/jobs/domain/inkbunny/job/update_posts_job.rb b/app/jobs/domain/inkbunny/job/update_posts_job.rb
index f98c1946..17e535f5 100644
--- a/app/jobs/domain/inkbunny/job/update_posts_job.rb
+++ b/app/jobs/domain/inkbunny/job/update_posts_job.rb
@@ -2,13 +2,13 @@
class Domain::Inkbunny::Job::UpdatePostsJob < Domain::Inkbunny::Job::Base
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
- ib_post_ids = args[:ib_post_ids]
+ ib_post_ids = T.cast(args[:ib_post_ids], T.nilable(T::Array[Integer]))
missing_pool_post_ib_ids = T::Set[Integer].new
pools_to_update = T::Set[Domain::PostGroup::InkbunnyPool].new
- if ib_post_ids.empty?
- logger.warn "empty ib_post_ids"
+ if ib_post_ids.blank? || ib_post_ids.empty?
+ logger.error("empty ib_post_ids")
return
end
diff --git a/app/jobs/domain/static_file_job_helper.rb b/app/jobs/domain/static_file_job_helper.rb
index 594355ec..d46ebbbc 100644
--- a/app/jobs/domain/static_file_job_helper.rb
+++ b/app/jobs/domain/static_file_job_helper.rb
@@ -35,7 +35,8 @@ module Domain::StaticFileJobHelper
begin
response = http_client.get(file_url_str)
- rescue Scraper::HttpClient::InvalidURLError => e
+ rescue Scraper::HttpClient::InvalidURLError,
+ Curl::Err::HostResolutionError => e
post_file.state_terminal_error!
post_file.error_message = e.message
logger.error(
diff --git a/app/jobs/scraper/job_base.rb b/app/jobs/scraper/job_base.rb
index a5500f9d..d707b946 100644
--- a/app/jobs/scraper/job_base.rb
+++ b/app/jobs/scraper/job_base.rb
@@ -174,139 +174,6 @@ class Scraper::JobBase < ApplicationJob
PERMITTED_CONTENT_TYPES =
T.let([%r{text/html}, %r{application/json}], T::Array[Regexp])
- sig do
- params(log_entry: HttpLogEntry, suppress_jobs: T::Array[T.untyped]).void
- end
- def enqueue_jobs_from_found_links(log_entry, suppress_jobs: [])
- return unless ENV["enqueue_jobs_from_found_links"] == "1"
-
- start_time = Time.now
- unless PERMITTED_CONTENT_TYPES.any? { |ct|
- ct.match(log_entry.content_type)
- }
- raise("unsupported content type: #{log_entry.content_type}")
- end
- document = log_entry.response&.contents || return
- link_finder = Scraper::LinkFinder.new(T.must(log_entry.uri_host), document)
- link_finder.logger.level = :error
- links = link_finder.find_links
- job_defs = []
-
- filter_mapper =
- proc do |list, type, key|
- list.filter { |item| item[:type] == type }.map { |item| item[key] }
- end
-
- attr_to_mapper =
- proc do |model_class, attr_sym, in_values|
- model_class
- .where({ attr_sym => in_values })
- .map { |model| [model.send(attr_sym), model] }
- .to_h
- end
-
- url_name_to_fa_user =
- attr_to_mapper.call(
- Domain::Fa::User,
- :url_name,
- filter_mapper.call(links, :fa_user, :url_name),
- )
-
- fa_id_to_fa_post =
- attr_to_mapper.call(
- Domain::Fa::Post,
- :fa_id,
- filter_mapper.call(links, :fa_post, :fa_id),
- )
-
- name_to_twitter_user =
- attr_to_mapper.call(
- Domain::Twitter::User,
- :name,
- filter_mapper.call(links, :twitter_user, :name),
- )
-
- links.each do |link|
- link_type = link[:type]
- case link_type
- when :fa_user
- url_name = link[:url_name]
- user = url_name_to_fa_user[url_name]
- params = (user ? { user: user } : { url_name: url_name })
-
- job_def =
- ({ params: params, desc: "fa user " + (user&.name || url_name).bold })
-
- if !user || user.due_for_page_scan?
- job_defs << job_def.merge(job: Domain::Fa::Job::UserPageJob)
- elsif !user || user.due_for_gallery_scan?
- job_defs << job_def.merge(job: Domain::Fa::Job::UserGalleryJob)
- end
- when :fa_post
- fa_id = link[:fa_id]
- post = fa_id_to_fa_post[fa_id]
- params = (post ? { post: post } : { fa_id: fa_id })
-
- job_def = ({ params: params, desc: "fa post #{fa_id.to_s.bold}" })
-
- if !post || (post.state == "ok" && !post.scanned?)
- job_defs << job_def.merge(job: Domain::Fa::Job::ScanPostJob)
- end
-
- if post && post.state == "ok" && post.scanned? && !post.have_file?
- job_defs << job_def.merge(job: Domain::Fa::Job::ScanFileJob)
- end
- when :twitter_user
- name = link[:name]
- user = name_to_twitter_user[name]
- params = (user ? { user: user } : { name: name })
-
- job_def =
- (
- {
- params: params,
- desc: "twitter user " + (user&.name || name).bold,
- }
- )
-
- if !user || user.due_for_timeline_tweets_scan?
- job_defs << job_def.merge(
- job: Domain::Twitter::Job::UserTimelineTweetsJob,
- )
- end
- else
- logger.warn(
- "unknown link type #{link_type.to_s.bold}: #{link.inspect.bold}",
- )
- end
- end
-
- job_defs.uniq!
- job_defs.reject! do |job_def|
- suppress_jobs.any? do |suppress|
- suppress == job_def.slice(*suppress.keys)
- end
- end
- job_defs.each do |job_def|
- job_class = job_def[:job]
- params = job_def[:params]
- desc = job_def[:desc]
- logger.debug(
- [
- "link finder -",
- job_class.name.split("::").last.to_s.ljust(22).bold.light_black,
- desc,
- ].join(" "),
- )
-
- defer_job(job_class, params.merge({ caused_by_entry: log_entry }))
- end
- duration_ms = (1000 * (Time.now - start_time)).to_i.to_s
- logger.info(
- "link finder - enqueue #{job_defs.size.to_s.light_white.bold} jobs (#{duration_ms.bold} ms)",
- )
- end
-
# Delay a little bit on Net::ReadTimeout or Errno::ECONNREFUSED
around_perform do |job, block|
block.call
diff --git a/app/lib/scraper/fa_http_client_config.rb b/app/lib/scraper/fa_http_client_config.rb
index fe7e20be..9da068b1 100644
--- a/app/lib/scraper/fa_http_client_config.rb
+++ b/app/lib/scraper/fa_http_client_config.rb
@@ -31,7 +31,7 @@ class Scraper::FaHttpClientConfig < Scraper::HttpClientConfig
def ratelimit
# number represents minimum delay in seconds between requests to the same domain
- [["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]]
+ [["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]]
end
def allowed_domains
diff --git a/app/lib/scraper/link_finder.rb b/app/lib/scraper/link_finder.rb
index a88ccf75..d5786576 100644
--- a/app/lib/scraper/link_finder.rb
+++ b/app/lib/scraper/link_finder.rb
@@ -22,7 +22,26 @@ class Scraper::LinkFinder
T::Array[Regexp],
)
- sig { returns(T::Array[T.untyped]) }
+ module FoundLink
+ class FaUser < T::ImmutableStruct
+ include T::Struct::ActsAsComparable
+ const :url_name, String
+ end
+
+ class FaPost < T::ImmutableStruct
+ include T::Struct::ActsAsComparable
+ const :fa_id, Integer
+ end
+
+ class TwitterUser < T::ImmutableStruct
+ include T::Struct::ActsAsComparable
+ const :name, String
+ end
+
+ Type = T.type_alias { T.any(FaUser, FaPost, TwitterUser) }
+ end
+
+ sig { returns(T::Array[FoundLink::Type]) }
def find_links
from_text_uris =
RAW_REGEXES
@@ -64,7 +83,7 @@ class Scraper::LinkFinder
process_twitter(uri)
end
end
- .reject(&:nil?)
+ .compact
.uniq
end
@@ -74,22 +93,18 @@ class Scraper::LinkFinder
FA_POST_PREFIXES = %w[/view/ /full/]
- sig do
- params(uri: Addressable::URI).returns(T.nilable(T::Hash[Symbol, T.untyped]))
- end
+ sig { params(uri: Addressable::URI).returns(T.nilable(FoundLink::Type)) }
def process_fa(uri)
if prefixed_with?(FA_USER_PREFIXES, uri.path)
url_name = path_parts(uri.path)[1]&.downcase
- { type: :fa_user, url_name: url_name } if url_name
+ FoundLink::FaUser.new(url_name: url_name) if url_name
elsif prefixed_with?(FA_POST_PREFIXES, uri.path)
fa_id = path_parts(uri.path)[1]&.to_i
- { type: :fa_post, fa_id: fa_id } if fa_id
+ FoundLink::FaPost.new(fa_id: fa_id) if fa_id
end
end
- sig do
- params(uri: Addressable::URI).returns(T.nilable(T::Hash[Symbol, T.untyped]))
- end
+ sig { params(uri: Addressable::URI).returns(T.nilable(FoundLink::Type)) }
def process_e621(uri)
nil
end
@@ -111,15 +126,13 @@ class Scraper::LinkFinder
support.twitter.com
]
- sig do
- params(uri: Addressable::URI).returns(T.nilable(T::Hash[Symbol, T.untyped]))
- end
+ sig { params(uri: Addressable::URI).returns(T.nilable(FoundLink::Type)) }
def process_twitter(uri)
return if TWITTER_IGNORE_HOSTS.include?(uri.host)
return if TWITTER_IGNORE_FIRST_PATH_PART.include?(path_parts(uri.path)[0])
name = path_parts(uri.path)[0]
- { type: :twitter_user, name: name } if name
+ FoundLink::TwitterUser.new(name: name) if name
end
sig { params(prefixes: T::Array[String], path: String).returns(T::Boolean) }
diff --git a/spec/helpers/perform_job_helpers.rb b/spec/helpers/perform_job_helpers.rb
index 06ccab10..ac2123cb 100644
--- a/spec/helpers/perform_job_helpers.rb
+++ b/spec/helpers/perform_job_helpers.rb
@@ -12,7 +12,10 @@ module PerformJobHelpers
).returns(T.untyped)
end
def perform_now(params, should_raise: false)
- ret = described_class.perform_now(params)
+ ret =
+ described_class.perform_now(
+ { skip_enqueue_found_links: true }.merge(params),
+ )
bt_printer =
Kernel.proc do
diff --git a/spec/jobs/domain/fa/job/user_page_job_spec.rb b/spec/jobs/domain/fa/job/user_page_job_spec.rb
index a959c69d..613a2b1a 100644
--- a/spec/jobs/domain/fa/job/user_page_job_spec.rb
+++ b/spec/jobs/domain/fa/job/user_page_job_spec.rb
@@ -64,4 +64,31 @@ describe Domain::Fa::Job::UserPageJob do
expect(user.num_favorites).to eq(0)
end
end
+
+ context "user with page that links to unseen users" do
+ let(:client_mock_config) do
+ [
+ {
+ uri: "https://www.furaffinity.net/user/angelpawqt/",
+ status_code: 200,
+ content_type: "text/html",
+ contents:
+ SpecUtil.read_fixture_file(
+ "domain/fa/user_page/user_page_angelpawqt.html",
+ ),
+ },
+ ]
+ end
+
+ it "enqueues jobs for the unseen users" do
+ perform_now({ url_name: "angelpawqt", skip_enqueue_found_links: false })
+ expect(
+ SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
+ ).to include(
+ hash_including(
+ user: Domain::User::FaUser.find_by(url_name: "8bitstarshon1"),
+ ),
+ )
+ end
+ end
end
diff --git a/spec/lib/scraper/fa_http_client_config_spec.rb b/spec/lib/scraper/fa_http_client_config_spec.rb
index f27fd4ba..29ae78fe 100644
--- a/spec/lib/scraper/fa_http_client_config_spec.rb
+++ b/spec/lib/scraper/fa_http_client_config_spec.rb
@@ -109,7 +109,7 @@ RSpec.describe Scraper::FaHttpClientConfig do
it "returns the configured rate limits" do
config = described_class.new
expect(config.ratelimit).to eq(
- [["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]],
+ [["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]],
)
end
end
diff --git a/spec/scraper/link_finder_spec.rb b/spec/scraper/link_finder_spec.rb
index 5bb8805e..080004f1 100644
--- a/spec/scraper/link_finder_spec.rb
+++ b/spec/scraper/link_finder_spec.rb
@@ -1,11 +1,84 @@
# typed: false
+require "rails_helper"
+
describe Scraper::LinkFinder do
def find_links(origin_host, document)
sort(Scraper::LinkFinder.new(origin_host, document).find_links)
end
def sort(links)
- links.sort_by { |l| l.to_s }
+ links.uniq.sort_by { |l| l.to_s }
+ end
+
+ it "finds links on FA user pages" do
+ document =
+ File.read(
+ "test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html",
+ )
+ links = find_links("www.furaffinity.net", document)
+ expect(links).to include(
+ Scraper::LinkFinder::FoundLink::FaPost.new(fa_id: 54_212_636),
+ )
+ expect(links).to include(
+ Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "8bitstarshon1"),
+ )
+ expect(links).to include(
+ Scraper::LinkFinder::FoundLink::TwitterUser.new(name: "furaffinity"),
+ )
+
+ # { type: :fa_post, fa_id: 44_848_118 },
+ # { type: :fa_post, fa_id: 50_619_594 },
+ # { type: :fa_post, fa_id: 51_321_948 },
+ # { type: :fa_post, fa_id: 51_379_769 },
+ # { type: :fa_post, fa_id: 52_782_367 },
+ # { type: :fa_post, fa_id: 54_212_636 },
+ # { type: :fa_post, fa_id: 54_717_098 },
+ # { type: :fa_post, fa_id: 57_492_887 },
+ # { type: :fa_post, fa_id: 57_567_803 },
+ # { type: :fa_post, fa_id: 57_736_082 },
+ # { type: :fa_post, fa_id: 57_744_156 },
+ # { type: :fa_post, fa_id: 57_889_649 },
+ # { type: :fa_post, fa_id: 57_898_215 },
+ # { type: :fa_post, fa_id: 58_080_112 },
+ # { type: :fa_post, fa_id: 58_196_925 },
+ # { type: :fa_post, fa_id: 58_233_811 },
+ # { type: :fa_post, fa_id: 59_515_926 },
+ # { type: :fa_user, url_name: "8bitstarshon1" },
+ # { type: :fa_user, url_name: "angelpawqt" },
+ # { type: :fa_user, url_name: "arilace" },
+ # { type: :fa_user, url_name: "chamomilearts" },
+ # { type: :fa_user, url_name: "cheshirkas" },
+ # { type: :fa_user, url_name: "cinnabunnyart" },
+ # { type: :fa_user, url_name: "cloudsen" },
+ # { type: :fa_user, url_name: "eyeswings" },
+ # { type: :fa_user, url_name: "fender" },
+ # { type: :fa_user, url_name: "feztiz" },
+ # { type: :fa_user, url_name: "grau" },
+ # { type: :fa_user, url_name: "huibiyyy" },
+ # { type: :fa_user, url_name: "hyongatayro" },
+ # { type: :fa_user, url_name: "i-luv-tentacles" },
+ # { type: :fa_user, url_name: "innoinatamm" },
+ # { type: :fa_user, url_name: "likara" },
+ # { type: :fa_user, url_name: "limoncella" },
+ # { type: :fa_user, url_name: "nacpanylis" },
+ # { type: :fa_user, url_name: "oseledetts" },
+ # { type: :fa_user, url_name: "pinxi" },
+ # { type: :fa_user, url_name: "plazmafox" },
+ # { type: :fa_user, url_name: "rosebyki1" },
+ # { type: :fa_user, url_name: "rosebyki2" },
+ # { type: :fa_user, url_name: "samarthanie" },
+ # { type: :fa_user, url_name: "sekaitoroto" },
+ # { type: :fa_user, url_name: "snakeki" },
+ # { type: :fa_user, url_name: "tailung" },
+ # { type: :fa_user, url_name: "tentastic" },
+ # { type: :fa_user, url_name: "toks" },
+ # { type: :fa_user, url_name: "vestel" },
+ # { type: :fa_user, url_name: "yabewenyx" },
+ # { type: :fa_user, url_name: "yoyikori" },
+ # { type: :fa_user, url_name: "zzreg" },
+ # { type: :fa_user, url_name: "~fikanori" },
+ # { type: :twitter_user, name: "furaffinity" },
+ # )
end
it "finds links on pages from furaffinity" do
@@ -13,14 +86,16 @@ describe Scraper::LinkFinder do
Hello, world! This is my document. Foobar
visit me at www.furaffinity.net/user/baz or twitter.com/foobartw
HTML
- assert_equal sort(
- [
- { type: :fa_user, url_name: "foobar" },
- { type: :fa_user, url_name: "baz" },
- { type: :twitter_user, name: "foobartw" }
- ]
- ),
- find_links("www.furaffinity.net", document)
+ links = find_links("www.furaffinity.net", document)
+ expect(links).to include(
+ Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "foobar"),
+ )
+ expect(links).to include(
+ Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "baz"),
+ )
+ expect(links).to include(
+ Scraper::LinkFinder::FoundLink::TwitterUser.new(name: "foobartw"),
+ )
end
it "ignores raw links ending in an ellipsis" do
@@ -38,7 +113,9 @@ describe Scraper::LinkFinder do
document = <<-HTML
www.furaffinity.net/user/Some-User
HTML
- assert_equal sort([{ type: :fa_user, url_name: "some-user" }]),
- find_links("www.furaffinity.net", document)
+ links = find_links("www.furaffinity.net", document)
+ expect(links).to include(
+ Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "some-user"),
+ )
end
end
diff --git a/test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html b/test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html
new file mode 100644
index 00000000..451372a0
--- /dev/null
+++ b/test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html
@@ -0,0 +1,1236 @@
+
+
+
+
+
+
+
+ Userpage of angelpawqt -- Fur Affinity [dot] net
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ~angelpawqt
+
+
+
+
+ Registered: Aug 27, 2024 02:15
+
+
+
+
+
+ +Watch
+
+
+ +Block
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



+
Welcome to my page
+
+
+


+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This user has no submissions.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Views: 242
+ Submissions: 0
+ Favs: 1
+
+ Comments Earned: 15
+ Comments Made: 12
+ Journals: 0
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Accepting Commissions
+ No
+
+
+
+
Character Species
+ Cow
+
+
+
Favorite Music
+ classic rock
+
+
+
Favorite TV Shows & Movies
+ The nightmare before christmas
+
+
+
Favorite Games
+ dbd and minecraft
+
+
+
Favorite Gaming Platforms
+ Playstation and pc
+
+
+
Favorite Animals
+ Cows and foxes
+
+
+
+
+
Favorite Foods & Drinks
+ enchiladas
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
This website uses cookies to enhance your browsing experience.
Learn More
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
EyesWings