enqueue from links
This commit is contained in:
@@ -10,6 +10,8 @@ class Domain::PostsController < DomainController
|
||||
before_action :set_post_group!, only: %i[posts_in_group]
|
||||
|
||||
class PostsIndexViewConfig < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
const :show_domain_filters, T::Boolean
|
||||
const :show_creator_links, T::Boolean
|
||||
const :index_type_header, String
|
||||
|
||||
@@ -5,6 +5,8 @@ class DomainController < ApplicationController
|
||||
abstract!
|
||||
|
||||
class DomainParamConfig < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
const :post_id_param, Symbol
|
||||
const :user_id_param, Symbol
|
||||
const :post_group_id_param, Symbol
|
||||
|
||||
@@ -91,6 +91,8 @@ module Domain::UsersHelper
|
||||
end
|
||||
|
||||
class StatRow < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
const :name, String
|
||||
const :value,
|
||||
T.nilable(
|
||||
|
||||
@@ -12,6 +12,11 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
|
||||
protected
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def skip_enqueue_found_links?
|
||||
!!arguments[0][:skip_enqueue_found_links]
|
||||
end
|
||||
|
||||
sig { params(build_post: T::Boolean).returns(Domain::Post::FaPost) }
|
||||
def post_from_args!(build_post: false)
|
||||
args = arguments[0]
|
||||
@@ -394,4 +399,106 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
user.association(:avatar).reload
|
||||
end
|
||||
end
|
||||
|
||||
FoundLink = Scraper::LinkFinder::FoundLink
|
||||
|
||||
sig do
|
||||
params(log_entry: HttpLogEntry, suppress_jobs: T::Array[T.untyped]).void
|
||||
end
|
||||
def enqueue_jobs_from_found_links(log_entry, suppress_jobs: [])
|
||||
return if skip_enqueue_found_links?
|
||||
|
||||
start_time = Time.now
|
||||
unless PERMITTED_CONTENT_TYPES.any? { |ct|
|
||||
ct.match(log_entry.content_type)
|
||||
}
|
||||
raise("unsupported content type: #{log_entry.content_type}")
|
||||
end
|
||||
document = log_entry.response&.contents || return
|
||||
link_finder = Scraper::LinkFinder.new(T.must(log_entry.uri_host), document)
|
||||
link_finder.logger.level = :error
|
||||
links = link_finder.find_links
|
||||
job_defs = []
|
||||
|
||||
url_names =
|
||||
links.filter_map do |link|
|
||||
link.is_a?(FoundLink::FaUser) ? link.url_name : nil
|
||||
end
|
||||
url_name_to_fa_user =
|
||||
T.let(
|
||||
Domain::User::FaUser.where(url_name: url_names).index_by(&:url_name),
|
||||
T::Hash[String, Domain::User::FaUser],
|
||||
)
|
||||
|
||||
fa_ids =
|
||||
links.filter_map do |link|
|
||||
link.is_a?(FoundLink::FaPost) ? link.fa_id : nil
|
||||
end
|
||||
fa_id_to_fa_post =
|
||||
T.cast(
|
||||
Domain::Post::FaPost.where(fa_id: fa_ids).index_by(&:fa_id),
|
||||
T::Hash[Integer, Domain::Post::FaPost],
|
||||
)
|
||||
|
||||
links
|
||||
.filter_map do |link|
|
||||
if link.is_a?(FoundLink::FaUser) || link.is_a?(FoundLink::FaPost)
|
||||
link
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
.each do |link|
|
||||
case link
|
||||
when FoundLink::FaUser
|
||||
url_name = link.url_name
|
||||
user =
|
||||
url_name_to_fa_user[url_name] ||
|
||||
Domain::User::FaUser.create!(url_name:) do |user|
|
||||
user.name ||= url_name
|
||||
end
|
||||
enqueue_user_scan(user)
|
||||
when FoundLink::FaPost
|
||||
fa_id = link.fa_id
|
||||
post =
|
||||
fa_id_to_fa_post[fa_id] ||
|
||||
Domain::Post::FaPost.build(fa_id:) do |post|
|
||||
post.first_seen_entry_id = log_entry.id
|
||||
end
|
||||
if post.new_record?
|
||||
post.save!
|
||||
defer_job(Domain::Fa::Job::ScanPostJob, { post: })
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
job_defs.uniq!
|
||||
job_defs.reject! do |job_def|
|
||||
suppress_jobs.any? do |suppress|
|
||||
suppress == job_def.slice(*suppress.keys)
|
||||
end
|
||||
end
|
||||
job_defs.each do |job_def|
|
||||
job_class = job_def[:job]
|
||||
params = job_def[:params]
|
||||
desc = job_def[:desc]
|
||||
logger.debug(
|
||||
[
|
||||
"link finder -",
|
||||
job_class.name.split("::").last.to_s.ljust(22).bold.light_black,
|
||||
desc,
|
||||
].join(" "),
|
||||
)
|
||||
|
||||
defer_job(job_class, params.merge({ caused_by_entry: log_entry }))
|
||||
end
|
||||
duration_ms = (1000 * (Time.now - start_time)).to_i.to_s
|
||||
logger.info(
|
||||
"link finder - enqueue #{job_defs.size.to_s.light_white.bold} jobs (#{duration_ms.bold} ms)",
|
||||
)
|
||||
rescue StandardError => e
|
||||
logger.error(
|
||||
"link finder - error enqueuing jobs: #{e.class.name} - #{e.message}",
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -57,6 +57,8 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
|
||||
)
|
||||
end
|
||||
|
||||
enqueue_jobs_from_found_links(response.log_entry)
|
||||
|
||||
page = Domain::Fa::Parser::Page.new(response.body)
|
||||
listing_page_stats =
|
||||
update_and_enqueue_posts_from_listings_page(
|
||||
|
||||
@@ -93,15 +93,8 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
|
||||
page_url =
|
||||
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
|
||||
response = http_client.get(page_url)
|
||||
log_entry = response.log_entry
|
||||
|
||||
fatal_error("failed to scan folder page") if response.status_code != 200
|
||||
|
||||
enqueue_jobs_from_found_links(
|
||||
log_entry,
|
||||
suppress_jobs: [{ job: self.class, url_name: user.url_name }],
|
||||
)
|
||||
|
||||
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
|
||||
user,
|
||||
response,
|
||||
|
||||
@@ -2,13 +2,13 @@
|
||||
class Domain::Inkbunny::Job::UpdatePostsJob < Domain::Inkbunny::Job::Base
|
||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform(args)
|
||||
ib_post_ids = args[:ib_post_ids]
|
||||
ib_post_ids = T.cast(args[:ib_post_ids], T.nilable(T::Array[Integer]))
|
||||
|
||||
missing_pool_post_ib_ids = T::Set[Integer].new
|
||||
pools_to_update = T::Set[Domain::PostGroup::InkbunnyPool].new
|
||||
|
||||
if ib_post_ids.empty?
|
||||
logger.warn "empty ib_post_ids"
|
||||
if ib_post_ids.blank? || ib_post_ids.empty?
|
||||
logger.error("empty ib_post_ids")
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
@@ -35,7 +35,8 @@ module Domain::StaticFileJobHelper
|
||||
|
||||
begin
|
||||
response = http_client.get(file_url_str)
|
||||
rescue Scraper::HttpClient::InvalidURLError => e
|
||||
rescue Scraper::HttpClient::InvalidURLError,
|
||||
Curl::Err::HostResolutionError => e
|
||||
post_file.state_terminal_error!
|
||||
post_file.error_message = e.message
|
||||
logger.error(
|
||||
|
||||
@@ -174,139 +174,6 @@ class Scraper::JobBase < ApplicationJob
|
||||
PERMITTED_CONTENT_TYPES =
|
||||
T.let([%r{text/html}, %r{application/json}], T::Array[Regexp])
|
||||
|
||||
sig do
|
||||
params(log_entry: HttpLogEntry, suppress_jobs: T::Array[T.untyped]).void
|
||||
end
|
||||
def enqueue_jobs_from_found_links(log_entry, suppress_jobs: [])
|
||||
return unless ENV["enqueue_jobs_from_found_links"] == "1"
|
||||
|
||||
start_time = Time.now
|
||||
unless PERMITTED_CONTENT_TYPES.any? { |ct|
|
||||
ct.match(log_entry.content_type)
|
||||
}
|
||||
raise("unsupported content type: #{log_entry.content_type}")
|
||||
end
|
||||
document = log_entry.response&.contents || return
|
||||
link_finder = Scraper::LinkFinder.new(T.must(log_entry.uri_host), document)
|
||||
link_finder.logger.level = :error
|
||||
links = link_finder.find_links
|
||||
job_defs = []
|
||||
|
||||
filter_mapper =
|
||||
proc do |list, type, key|
|
||||
list.filter { |item| item[:type] == type }.map { |item| item[key] }
|
||||
end
|
||||
|
||||
attr_to_mapper =
|
||||
proc do |model_class, attr_sym, in_values|
|
||||
model_class
|
||||
.where({ attr_sym => in_values })
|
||||
.map { |model| [model.send(attr_sym), model] }
|
||||
.to_h
|
||||
end
|
||||
|
||||
url_name_to_fa_user =
|
||||
attr_to_mapper.call(
|
||||
Domain::Fa::User,
|
||||
:url_name,
|
||||
filter_mapper.call(links, :fa_user, :url_name),
|
||||
)
|
||||
|
||||
fa_id_to_fa_post =
|
||||
attr_to_mapper.call(
|
||||
Domain::Fa::Post,
|
||||
:fa_id,
|
||||
filter_mapper.call(links, :fa_post, :fa_id),
|
||||
)
|
||||
|
||||
name_to_twitter_user =
|
||||
attr_to_mapper.call(
|
||||
Domain::Twitter::User,
|
||||
:name,
|
||||
filter_mapper.call(links, :twitter_user, :name),
|
||||
)
|
||||
|
||||
links.each do |link|
|
||||
link_type = link[:type]
|
||||
case link_type
|
||||
when :fa_user
|
||||
url_name = link[:url_name]
|
||||
user = url_name_to_fa_user[url_name]
|
||||
params = (user ? { user: user } : { url_name: url_name })
|
||||
|
||||
job_def =
|
||||
({ params: params, desc: "fa user " + (user&.name || url_name).bold })
|
||||
|
||||
if !user || user.due_for_page_scan?
|
||||
job_defs << job_def.merge(job: Domain::Fa::Job::UserPageJob)
|
||||
elsif !user || user.due_for_gallery_scan?
|
||||
job_defs << job_def.merge(job: Domain::Fa::Job::UserGalleryJob)
|
||||
end
|
||||
when :fa_post
|
||||
fa_id = link[:fa_id]
|
||||
post = fa_id_to_fa_post[fa_id]
|
||||
params = (post ? { post: post } : { fa_id: fa_id })
|
||||
|
||||
job_def = ({ params: params, desc: "fa post #{fa_id.to_s.bold}" })
|
||||
|
||||
if !post || (post.state == "ok" && !post.scanned?)
|
||||
job_defs << job_def.merge(job: Domain::Fa::Job::ScanPostJob)
|
||||
end
|
||||
|
||||
if post && post.state == "ok" && post.scanned? && !post.have_file?
|
||||
job_defs << job_def.merge(job: Domain::Fa::Job::ScanFileJob)
|
||||
end
|
||||
when :twitter_user
|
||||
name = link[:name]
|
||||
user = name_to_twitter_user[name]
|
||||
params = (user ? { user: user } : { name: name })
|
||||
|
||||
job_def =
|
||||
(
|
||||
{
|
||||
params: params,
|
||||
desc: "twitter user " + (user&.name || name).bold,
|
||||
}
|
||||
)
|
||||
|
||||
if !user || user.due_for_timeline_tweets_scan?
|
||||
job_defs << job_def.merge(
|
||||
job: Domain::Twitter::Job::UserTimelineTweetsJob,
|
||||
)
|
||||
end
|
||||
else
|
||||
logger.warn(
|
||||
"unknown link type #{link_type.to_s.bold}: #{link.inspect.bold}",
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
job_defs.uniq!
|
||||
job_defs.reject! do |job_def|
|
||||
suppress_jobs.any? do |suppress|
|
||||
suppress == job_def.slice(*suppress.keys)
|
||||
end
|
||||
end
|
||||
job_defs.each do |job_def|
|
||||
job_class = job_def[:job]
|
||||
params = job_def[:params]
|
||||
desc = job_def[:desc]
|
||||
logger.debug(
|
||||
[
|
||||
"link finder -",
|
||||
job_class.name.split("::").last.to_s.ljust(22).bold.light_black,
|
||||
desc,
|
||||
].join(" "),
|
||||
)
|
||||
|
||||
defer_job(job_class, params.merge({ caused_by_entry: log_entry }))
|
||||
end
|
||||
duration_ms = (1000 * (Time.now - start_time)).to_i.to_s
|
||||
logger.info(
|
||||
"link finder - enqueue #{job_defs.size.to_s.light_white.bold} jobs (#{duration_ms.bold} ms)",
|
||||
)
|
||||
end
|
||||
|
||||
# Delay a little bit on Net::ReadTimeout or Errno::ECONNREFUSED
|
||||
around_perform do |job, block|
|
||||
block.call
|
||||
|
||||
@@ -31,7 +31,7 @@ class Scraper::FaHttpClientConfig < Scraper::HttpClientConfig
|
||||
|
||||
def ratelimit
|
||||
# number represents minimum delay in seconds between requests to the same domain
|
||||
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]]
|
||||
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]]
|
||||
end
|
||||
|
||||
def allowed_domains
|
||||
|
||||
@@ -22,7 +22,26 @@ class Scraper::LinkFinder
|
||||
T::Array[Regexp],
|
||||
)
|
||||
|
||||
sig { returns(T::Array[T.untyped]) }
|
||||
module FoundLink
|
||||
class FaUser < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
const :url_name, String
|
||||
end
|
||||
|
||||
class FaPost < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
const :fa_id, Integer
|
||||
end
|
||||
|
||||
class TwitterUser < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
const :name, String
|
||||
end
|
||||
|
||||
Type = T.type_alias { T.any(FaUser, FaPost, TwitterUser) }
|
||||
end
|
||||
|
||||
sig { returns(T::Array[FoundLink::Type]) }
|
||||
def find_links
|
||||
from_text_uris =
|
||||
RAW_REGEXES
|
||||
@@ -64,7 +83,7 @@ class Scraper::LinkFinder
|
||||
process_twitter(uri)
|
||||
end
|
||||
end
|
||||
.reject(&:nil?)
|
||||
.compact
|
||||
.uniq
|
||||
end
|
||||
|
||||
@@ -74,22 +93,18 @@ class Scraper::LinkFinder
|
||||
|
||||
FA_POST_PREFIXES = %w[/view/ /full/]
|
||||
|
||||
sig do
|
||||
params(uri: Addressable::URI).returns(T.nilable(T::Hash[Symbol, T.untyped]))
|
||||
end
|
||||
sig { params(uri: Addressable::URI).returns(T.nilable(FoundLink::Type)) }
|
||||
def process_fa(uri)
|
||||
if prefixed_with?(FA_USER_PREFIXES, uri.path)
|
||||
url_name = path_parts(uri.path)[1]&.downcase
|
||||
{ type: :fa_user, url_name: url_name } if url_name
|
||||
FoundLink::FaUser.new(url_name: url_name) if url_name
|
||||
elsif prefixed_with?(FA_POST_PREFIXES, uri.path)
|
||||
fa_id = path_parts(uri.path)[1]&.to_i
|
||||
{ type: :fa_post, fa_id: fa_id } if fa_id
|
||||
FoundLink::FaPost.new(fa_id: fa_id) if fa_id
|
||||
end
|
||||
end
|
||||
|
||||
sig do
|
||||
params(uri: Addressable::URI).returns(T.nilable(T::Hash[Symbol, T.untyped]))
|
||||
end
|
||||
sig { params(uri: Addressable::URI).returns(T.nilable(FoundLink::Type)) }
|
||||
def process_e621(uri)
|
||||
nil
|
||||
end
|
||||
@@ -111,15 +126,13 @@ class Scraper::LinkFinder
|
||||
support.twitter.com
|
||||
]
|
||||
|
||||
sig do
|
||||
params(uri: Addressable::URI).returns(T.nilable(T::Hash[Symbol, T.untyped]))
|
||||
end
|
||||
sig { params(uri: Addressable::URI).returns(T.nilable(FoundLink::Type)) }
|
||||
def process_twitter(uri)
|
||||
return if TWITTER_IGNORE_HOSTS.include?(uri.host)
|
||||
return if TWITTER_IGNORE_FIRST_PATH_PART.include?(path_parts(uri.path)[0])
|
||||
|
||||
name = path_parts(uri.path)[0]
|
||||
{ type: :twitter_user, name: name } if name
|
||||
FoundLink::TwitterUser.new(name: name) if name
|
||||
end
|
||||
|
||||
sig { params(prefixes: T::Array[String], path: String).returns(T::Boolean) }
|
||||
|
||||
@@ -12,7 +12,10 @@ module PerformJobHelpers
|
||||
).returns(T.untyped)
|
||||
end
|
||||
def perform_now(params, should_raise: false)
|
||||
ret = described_class.perform_now(params)
|
||||
ret =
|
||||
described_class.perform_now(
|
||||
{ skip_enqueue_found_links: true }.merge(params),
|
||||
)
|
||||
|
||||
bt_printer =
|
||||
Kernel.proc do
|
||||
|
||||
@@ -64,4 +64,31 @@ describe Domain::Fa::Job::UserPageJob do
|
||||
expect(user.num_favorites).to eq(0)
|
||||
end
|
||||
end
|
||||
|
||||
context "user with page that links to unseen users" do
|
||||
let(:client_mock_config) do
|
||||
[
|
||||
{
|
||||
uri: "https://www.furaffinity.net/user/angelpawqt/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents:
|
||||
SpecUtil.read_fixture_file(
|
||||
"domain/fa/user_page/user_page_angelpawqt.html",
|
||||
),
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
it "enqueues jobs for the unseen users" do
|
||||
perform_now({ url_name: "angelpawqt", skip_enqueue_found_links: false })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
|
||||
).to include(
|
||||
hash_including(
|
||||
user: Domain::User::FaUser.find_by(url_name: "8bitstarshon1"),
|
||||
),
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -109,7 +109,7 @@ RSpec.describe Scraper::FaHttpClientConfig do
|
||||
it "returns the configured rate limits" do
|
||||
config = described_class.new
|
||||
expect(config.ratelimit).to eq(
|
||||
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]],
|
||||
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]],
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,11 +1,84 @@
|
||||
# typed: false
|
||||
require "rails_helper"
|
||||
|
||||
describe Scraper::LinkFinder do
|
||||
def find_links(origin_host, document)
|
||||
sort(Scraper::LinkFinder.new(origin_host, document).find_links)
|
||||
end
|
||||
|
||||
def sort(links)
|
||||
links.sort_by { |l| l.to_s }
|
||||
links.uniq.sort_by { |l| l.to_s }
|
||||
end
|
||||
|
||||
it "finds links on FA user pages" do
|
||||
document =
|
||||
File.read(
|
||||
"test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html",
|
||||
)
|
||||
links = find_links("www.furaffinity.net", document)
|
||||
expect(links).to include(
|
||||
Scraper::LinkFinder::FoundLink::FaPost.new(fa_id: 54_212_636),
|
||||
)
|
||||
expect(links).to include(
|
||||
Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "8bitstarshon1"),
|
||||
)
|
||||
expect(links).to include(
|
||||
Scraper::LinkFinder::FoundLink::TwitterUser.new(name: "furaffinity"),
|
||||
)
|
||||
|
||||
# { type: :fa_post, fa_id: 44_848_118 },
|
||||
# { type: :fa_post, fa_id: 50_619_594 },
|
||||
# { type: :fa_post, fa_id: 51_321_948 },
|
||||
# { type: :fa_post, fa_id: 51_379_769 },
|
||||
# { type: :fa_post, fa_id: 52_782_367 },
|
||||
# { type: :fa_post, fa_id: 54_212_636 },
|
||||
# { type: :fa_post, fa_id: 54_717_098 },
|
||||
# { type: :fa_post, fa_id: 57_492_887 },
|
||||
# { type: :fa_post, fa_id: 57_567_803 },
|
||||
# { type: :fa_post, fa_id: 57_736_082 },
|
||||
# { type: :fa_post, fa_id: 57_744_156 },
|
||||
# { type: :fa_post, fa_id: 57_889_649 },
|
||||
# { type: :fa_post, fa_id: 57_898_215 },
|
||||
# { type: :fa_post, fa_id: 58_080_112 },
|
||||
# { type: :fa_post, fa_id: 58_196_925 },
|
||||
# { type: :fa_post, fa_id: 58_233_811 },
|
||||
# { type: :fa_post, fa_id: 59_515_926 },
|
||||
# { type: :fa_user, url_name: "8bitstarshon1" },
|
||||
# { type: :fa_user, url_name: "angelpawqt" },
|
||||
# { type: :fa_user, url_name: "arilace" },
|
||||
# { type: :fa_user, url_name: "chamomilearts" },
|
||||
# { type: :fa_user, url_name: "cheshirkas" },
|
||||
# { type: :fa_user, url_name: "cinnabunnyart" },
|
||||
# { type: :fa_user, url_name: "cloudsen" },
|
||||
# { type: :fa_user, url_name: "eyeswings" },
|
||||
# { type: :fa_user, url_name: "fender" },
|
||||
# { type: :fa_user, url_name: "feztiz" },
|
||||
# { type: :fa_user, url_name: "grau" },
|
||||
# { type: :fa_user, url_name: "huibiyyy" },
|
||||
# { type: :fa_user, url_name: "hyongatayro" },
|
||||
# { type: :fa_user, url_name: "i-luv-tentacles" },
|
||||
# { type: :fa_user, url_name: "innoinatamm" },
|
||||
# { type: :fa_user, url_name: "likara" },
|
||||
# { type: :fa_user, url_name: "limoncella" },
|
||||
# { type: :fa_user, url_name: "nacpanylis" },
|
||||
# { type: :fa_user, url_name: "oseledetts" },
|
||||
# { type: :fa_user, url_name: "pinxi" },
|
||||
# { type: :fa_user, url_name: "plazmafox" },
|
||||
# { type: :fa_user, url_name: "rosebyki1" },
|
||||
# { type: :fa_user, url_name: "rosebyki2" },
|
||||
# { type: :fa_user, url_name: "samarthanie" },
|
||||
# { type: :fa_user, url_name: "sekaitoroto" },
|
||||
# { type: :fa_user, url_name: "snakeki" },
|
||||
# { type: :fa_user, url_name: "tailung" },
|
||||
# { type: :fa_user, url_name: "tentastic" },
|
||||
# { type: :fa_user, url_name: "toks" },
|
||||
# { type: :fa_user, url_name: "vestel" },
|
||||
# { type: :fa_user, url_name: "yabewenyx" },
|
||||
# { type: :fa_user, url_name: "yoyikori" },
|
||||
# { type: :fa_user, url_name: "zzreg" },
|
||||
# { type: :fa_user, url_name: "~fikanori" },
|
||||
# { type: :twitter_user, name: "furaffinity" },
|
||||
# )
|
||||
end
|
||||
|
||||
it "finds links on pages from furaffinity" do
|
||||
@@ -13,14 +86,16 @@ describe Scraper::LinkFinder do
|
||||
Hello, world! This is my document. <a href="/user/foobar">Foobar</a>
|
||||
visit me at www.furaffinity.net/user/baz or twitter.com/foobartw
|
||||
HTML
|
||||
assert_equal sort(
|
||||
[
|
||||
{ type: :fa_user, url_name: "foobar" },
|
||||
{ type: :fa_user, url_name: "baz" },
|
||||
{ type: :twitter_user, name: "foobartw" }
|
||||
]
|
||||
),
|
||||
find_links("www.furaffinity.net", document)
|
||||
links = find_links("www.furaffinity.net", document)
|
||||
expect(links).to include(
|
||||
Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "foobar"),
|
||||
)
|
||||
expect(links).to include(
|
||||
Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "baz"),
|
||||
)
|
||||
expect(links).to include(
|
||||
Scraper::LinkFinder::FoundLink::TwitterUser.new(name: "foobartw"),
|
||||
)
|
||||
end
|
||||
|
||||
it "ignores raw links ending in an ellipsis" do
|
||||
@@ -38,7 +113,9 @@ describe Scraper::LinkFinder do
|
||||
document = <<-HTML
|
||||
www.furaffinity.net/user/Some-User
|
||||
HTML
|
||||
assert_equal sort([{ type: :fa_user, url_name: "some-user" }]),
|
||||
find_links("www.furaffinity.net", document)
|
||||
links = find_links("www.furaffinity.net", document)
|
||||
expect(links).to include(
|
||||
Scraper::LinkFinder::FoundLink::FaUser.new(url_name: "some-user"),
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
1236
test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html
vendored
Normal file
1236
test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user