scaffold out recommendation user script

This commit is contained in:
Dylan Knutson
2023-04-01 16:33:07 +09:00
parent 2fe46ae778
commit b33028eb6e
16 changed files with 2218 additions and 26 deletions

View File

@@ -105,4 +105,8 @@ gem "good_job"
gem "neighbor"
gem "disco"
gem "faiss"
group :production do
gem "rails_semantic_logger"
end
# gem "pg_party"

View File

@@ -235,6 +235,10 @@ GEM
nokogiri (>= 1.6)
rails-html-sanitizer (1.5.0)
loofah (~> 2.19, >= 2.19.1)
rails_semantic_logger (4.12.0)
rack
railties (>= 5.1)
semantic_logger (~> 4.13)
railties (7.0.4.2)
actionpack (= 7.0.4.2)
activesupport (= 7.0.4.2)
@@ -275,6 +279,8 @@ GEM
rexml (~> 3.2, >= 3.2.5)
rubyzip (>= 1.2.2, < 3.0)
websocket (~> 1.0)
semantic_logger (4.13.0)
concurrent-ruby (~> 1.0)
sprockets (4.2.0)
concurrent-ruby (~> 1.0)
rack (>= 2.2.4, < 4)
@@ -348,6 +354,7 @@ DEPENDENCIES
puma (~> 5.0)
rack-mini-profiler
rails (~> 7.0.4, >= 7.0.4.2)
rails_semantic_logger
rb-bsdiff!
ripcord
rspec-rails

View File

@@ -14,15 +14,17 @@ Rake.application.rake_require "e621"
Rake.application.rake_require "twitter"
task :set_ar_stdout => :environment do
ActiveRecord::Base.logger = Logger.new(STDOUT)
ActiveRecord::Base.logger = Logger.new($stdout)
end
task :set_logger_stdout => :environment do
Rails.logger = Logger.new(STDOUT)
Rails.logger = Logger.new($stdout)
Rails.logger.formatter = proc do |severity, datetime, progname, msg|
"#{severity}: #{msg}\n"
end
ActiveRecord::Base.logger = nil
ActiveJob::Base.logger = nil
GoodJob.logger = Rails.logger
end
task :good_job_cron => :environment do
@@ -48,8 +50,8 @@ task :good_job do
"manual:4",
"static_file:3",
"+static_file,fa_post:1",
"+fa_user_page,fa_user_gallery,fa_post:2",
"fa_user_follows:1",
"+fa_user_page,fa_user_gallery,fa_post:3",
"fa_user_follows:2",
proxy == "serverhost-1" ? nil : "twitter_timeline_tweets:1",
].reject(&:nil?).join(";"),
}

View File

@@ -1,2 +1,21 @@
class ApplicationController < ActionController::Base
before_action :validate_api_token
API_TOKENS = {
"a4eb03ac-b33c-439c-9b51-a834d1c5cf48" => "dymk",
"56cc81fe-8c00-4436-8981-4580eab00e66" => "targus",
"a36f0d68-5262-4b62-9e2d-dfe648d70f35" => "vilk",
"9c38727f-f11d-41de-b775-0effd86d520c" => "xjal",
"e38c568f-a24d-4f26-87f0-dfcd898a359d" => "fyacin",
}
def validate_api_token
api_token = request.params[:api_token]
user = API_TOKENS[api_token]
if user.nil?
constraint = VpnOnlyRouteConstraint.new
return if constraint.matches?(request)
render status: 403, json: { error: "not authenticated" }
end
end
end

View File

@@ -151,8 +151,102 @@ class Domain::Fa::ApiController < ApplicationController
}
end
def similar_users
url_name = params[:url_name]
exclude_url_name = params[:exclude_url_name]
user = Domain::Fa::User.find_by(url_name: url_name)
if user.nil?
render status: 404, json: {
error: "user '#{url_name}' not found",
error_type: "user_not_found",
}
return
end
if user.disco.nil?
render status: 500, json: {
error: "user '#{url_name}' has not had recommendations computed yet",
error_type: "recs_not_computed",
}
return
end
all_similar_users = similar_users_list(user.similar_users_by_followed)
not_followed_similar_users = nil
if exclude_url_name
exclude_user = Domain::Fa::User.find_by(url_name: exclude_url_name)
not_followed_similar_users = if exclude_user.nil?
# TODO - enqueue a manual UserFollowsJob for this user and have client
# re-try the request later
{
error: "user '#{exclude_url_name}' not found",
error_type: "exclude_user_not_found",
}
elsif exclude_user.scanned_follows_at.nil?
{
error: "user '#{exclude_url_name}' followers list hasn't been scanned",
error_type: "exclude_user_not_scanned",
}
else
similar_users_list(user.similar_users_by_followed(exclude_user))
end
end
render json: {
all: all_similar_users,
not_followed: not_followed_similar_users,
}
end
private
def similar_users_list(list, limit: 10)
list.limit(limit).map do |user|
# TODO - go re-scan all user pages and extract user thumbnail
profile_thumb_url = nil
begin
profile_page_response = get_best_user_page_http_log_entry_for(user.url_name)
if profile_page_response
parser = Domain::Fa::Parser::Page.new(profile_page_response.contents, require_logged_in: false)
profile_thumb_url = parser.user_page.profile_thumb_url
else
if user.scanned_page_at.nil?
Domain::Fa::Job::UserPageJob.set({
priority: -20,
}).perform_later({
user: user,
})
end
end
rescue
logger.error("error getting profile_thumb_url: #{$!.message}")
end
{
name: user.name,
url_name: user.url_name,
profile_thumb_url: profile_thumb_url,
url: "https://www.furaffinity.net/user/#{user.url_name}/",
}
end
end
def get_best_user_page_http_log_entry_for(url_name)
for_path = proc { |uri_path|
HttpLogEntry.where(
uri_scheme: "https",
uri_host: "www.furaffinity.net",
uri_path: uri_path,
).order(created_at: :desc).first&.response
}
# older versions don't end in a trailing slash
for_path.call("/user/#{url_name}/") ||
for_path.call("/user/#{url_name}")
end
def defer_post_scan(post, fa_id)
if !post || !post.scanned?
defer_manual(Domain::Fa::Job::ScanPostJob, {

View File

@@ -0,0 +1,20 @@
class UserScriptsController < ApplicationController
skip_before_action :validate_api_token, only: [:get]
def get
expires_in 1.hour, public: true
response.cache_control[:public] = false
response.cache_control[:private] = true
script = params[:script]
case script
when "furecs.user.js"
send_file(
Rails.root.join("user_scripts/furecs.user.js"),
type: "application/json",
)
else
render status: 404, text: "not found"
end
end
end

View File

@@ -37,6 +37,16 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
@artist_type ||= elem_after_text_match(main_about.children, /Type/).try(:text).try(:strip)
end
def profile_thumb_url
@profile_thumb_url ||= case @page_version
when VERSION_0
@elem.css(".addpad.alt1 a img.avatar").first.try(:[], "src")
when VERSION_2
@elem.css("userpage-nav-avatar a.current img").first.try(:[], "src")
else unimplemented_version!
end
end
def registered_since
@registered_since ||= case @page_version
when VERSION_0, VERSION_1

View File

@@ -0,0 +1,6 @@
class VpnOnlyRouteConstraint
def matches?(request)
# curtus IP on vpn
request.ip == "10.200.0.3" || request.ip == "127.0.0.1"
end
end

View File

@@ -90,12 +90,12 @@ class Domain::Fa::User < ReduxApplicationRecord
name.delete("_").delete("!").downcase
end
# users similar to this one by the set of users that follow this
# users similar to this one, based on who this user follows
def similar_users_by_follower
similar_users_by(:for_follower, nil)
end
# users similar to this one by the set of users that follow this
# users similar to this one, based on who follows this user
def similar_users_by_followed(exclude_already_followed = nil)
similar_users_by(:for_followed, exclude_already_followed)
end

View File

@@ -88,6 +88,22 @@ Rails.application.configure do
config.logger = ActiveSupport::TaggedLogging.new(logger)
end
config.log_tags = {
ip: ->request {
request.headers["HTTP_CF_CONNECTING_IP"] || request.remote_ip
},
api_token: ->request { request.params[:api_token] || "(nil api token)" },
user_name: ->request {
api_token = request.params[:api_token]
if api_token
user = ApplicationController::API_TOKENS[api_token]
user || "(nil user)"
else
"(nil api_token)"
end
},
}
# Do not dump schema after migrations.
config.active_record.dump_schema_after_migration = false
end

View File

@@ -1,33 +1,42 @@
Rails.application.routes.draw do
# Define your application routes per the DSL in https://guides.rubyonrails.org/routing.html
mount GoodJob::Engine => "jobs"
namespace :api do
namespace :fa do
post :enqueue_objects, to: "/domain/fa/api#enqueue_objects"
post :object_statuses, to: "/domain/fa/api#object_statuses"
end
namespace :twitter do
post :enqueue_objects, to: "/domain/twitter/api#enqueue_objects"
post :object_statuses, to: "/domain/twitter/api#object_statuses"
get :similar_users, to: "/domain/fa/api#similar_users"
end
end
namespace :domain do
namespace :fa do
resources :users
resources :posts, param: :fa_id, only: [:index, :show] do
post :scan_post, on: :member
get "us/:script", to: "user_scripts#get", constraints: { script: /.*/ }
scope constraints: VpnOnlyRouteConstraint.new do
mount GoodJob::Engine => "jobs"
namespace :api do
namespace :fa do
post :enqueue_objects, to: "/domain/fa/api#enqueue_objects"
post :object_statuses, to: "/domain/fa/api#object_statuses"
end
namespace :twitter do
post :enqueue_objects, to: "/domain/twitter/api#enqueue_objects"
post :object_statuses, to: "/domain/twitter/api#object_statuses"
end
end
end
# Defines the root path route ("/")
# root "articles#index"
resources :log_entries, only: [:index, :show] do
get :contents, on: :member
get :stats, on: :collection
namespace :domain do
namespace :fa do
resources :users
resources :posts, param: :fa_id, only: [:index, :show] do
post :scan_post, on: :member
end
end
end
get "filter/*filter", on: :collection, action: :index, constraints: { filter: /.*/ }
# Defines the root path route ("/")
# root "articles#index"
resources :log_entries, only: [:index, :show] do
get :contents, on: :member
get :stats, on: :collection
get "filter/*filter", on: :collection, action: :index, constraints: { filter: /.*/ }
end
end
end

View File

@@ -49,6 +49,13 @@ describe Domain::Fa::Parser::Page do
assert_equal "Jan 12th, 2006 07:52", up.registered_since
end
it "user page old old version is correct" do
parser = get_parser "user_page_zaush_old_old.html"
assert parser.logged_in?
up = parser.user_page
assert_equal "//a.facdn.net/1459994050/zaush.gif", up.profile_thumb_url
end
it "gallery_is_correct" do
parser = get_parser "gallery_page_miles_df.html"
assert parser.probably_listings_page?

View File

@@ -43,6 +43,7 @@ describe Domain::Fa::Parser::Page do
assert_equal 6, up.num_journals
assert_equal 1_236_200, up.num_favorites
assert_equal DateTime.new(2006, 1, 12, 7, 52), up.registered_since
assert_equal "//a.furaffinity.net/1556545516/miles-df.gif", up.profile_thumb_url
end
it "gallery_is_correct" do

File diff suppressed because one or more lines are too long

229
user_scripts/furecs.user.js Normal file
View File

@@ -0,0 +1,229 @@
// ==UserScript==
// @name FuRecs
// @namespace https://twitter.com/DeltaNoises
// @version 1.2
// @description FurAffinity User Recommender
// @author https://twitter.com/DeltaNoises
// @match https://www.furaffinity.net/user/*
// @icon https://www.google.com/s2/favicons?sz=64&domain=furaffinity.net
// @grant GM_xmlhttpRequest
// @grant GM.getValue
// @grant GM.setValue
// @connect refurrer.com
// @updateURL https://refurrer.com/us/furecs.user.js
// @downloadURL https://refurrer.com/us/furecs.user.js
// ==/UserScript==
"use strict";
const API_HOST = "https://refurrer.com";
function urlNameFromUserHref(href) {
const url = new URL(href);
if (url.host != "www.furaffinity.net") {
return null;
}
const userPageRegex = /^\/(user|gallery|scraps|favorites|journals)\/.+/;
const match = url.pathname.match(userPageRegex);
if (!match) {
return null;
}
return url.pathname.split("/")[2];
}
async function fa() {
if (!window.location.pathname.startsWith("/user/")) {
return null;
}
const urlName = urlNameFromUserHref(window.location.href);
if (!urlName) {
return;
}
// if the user is logged in, get their user account, so we can
// also provide a list of recommendations that don't overlap with their
// current watchlist
let loggedInUrlName = null;
const loggedInLink = document.querySelector(
".mobile-nav-content-container h2 a"
);
if (loggedInLink) {
loggedInUrlName = urlNameFromUserHref(loggedInLink.href);
}
function buildRecommendedContainer() {
let leftColumn = document.querySelector(
".userpage-layout-left-col-content"
);
if (!leftColumn) {
console.log("didn't find left column container, bailing");
return [];
}
let section = document.createElement("section");
section.classList = "userpage-left-column gallery_container";
section.innerHTML = `
<div class="userpage-section-left">
<div class="section-header" style='display:flex;align-items:center'>
<h2 style='flex-grow:1'>Similar Users</h2>
</div>
<div class="section-body">
</div>
</div>`;
leftColumn.prepend(section);
return [
section.querySelector(".section-body"),
section.querySelector(".section-header"),
];
}
let [container, header] = buildRecommendedContainer();
if (!container) {
return;
}
console.log(`getting recommended follows for ${urlName}`);
function addApiTokenSetForm() {
const form = document.createElement("form");
form.innerHTML = `
<input style='height:32px;min-width:20em' required type=text placeholder="API Token" name="api_token" />
<input class='button' type=submit value="Save" />
`;
header.appendChild(form);
const input = form.querySelector("input[type=text]");
form.onsubmit = async () => {
apiToken = input.value;
await GM.setValue("apiToken", apiToken);
header.removeChild(form);
addApiTokenClearButton();
run();
};
}
function addApiTokenClearButton() {
const button = document.createElement("button");
const apiTokenShort =
apiToken.length > 10 ? apiToken.slice(0, 7) + "..." : apiToken;
button.innerHTML = `Clear Token (${apiTokenShort})`;
header.appendChild(button);
button.onclick = async () => {
apiToken = null;
await GM.setValue("apiToken", null);
header.removeChild(button);
addApiTokenSetForm();
};
}
let apiToken = await GM.getValue("apiToken", null);
if (apiToken == null) {
container.innerHTML = `<h2 class='aligncenter'>Please set API token</h2>`;
addApiTokenSetForm();
} else {
addApiTokenClearButton();
run();
}
function run() {
container.innerHTML = `<h2 class='aligncenter'>(Loading...)</h2>`;
let url = `${API_HOST}/api/fa/similar_users`;
url += `?url_name=${encodeURIComponent(urlName)}`;
if (loggedInUrlName) {
url += `&exclude_url_name=${encodeURIComponent(loggedInUrlName)}`;
}
url += `&api_token=${encodeURIComponent(apiToken)}`;
function setContainerError(error) {
container.innerHTML = `Error: <pre>${error}</pre>`;
}
GM_xmlhttpRequest({
url,
method: "GET",
headers: {
Accept: "application/json",
"Content-Type": "application/json",
},
onload: (response) => {
const json_error_codes = [403, 404, 501];
if (response.status == 200) {
let json = JSON.parse(response.response);
populateRecommendations(json);
} else if (json_error_codes.includes(response.status)) {
let json = JSON.parse(response.response);
setContainerError(json.error);
} else {
setContainerError(response.response);
}
},
});
}
function populateRecommendations(recommendations) {
if (recommendations.not_followed) {
container.innerHTML = buildTwoColumns(recommendations);
} else {
container.innerHTML = buildOneColumn(recommendations.all);
}
}
function buildTwoColumns({ all, not_followed }) {
let content = ``;
content += `<table>`;
content += ` <tr>`;
content += ` <th>Similar to ${urlName}</th>`;
content += ` <th>Users you don't follow</th>`;
content += ` </tr>`;
content += ` <tr>`;
content += ` <td>`;
content += ` ` + buildOneColumn(all);
content += ` </td>`;
content += ` <td>`;
if (not_followed.error != null) {
content +=
` Error getting recommended users ` +
`you don't already follow: ${not_followed.error}`;
} else {
content += ` ` + buildOneColumn(not_followed);
}
content += ` </td>`;
content += ` </tr>`;
content += `</table>`;
return content;
}
function buildOneColumn(userList) {
let content = ``;
content += '<ol class="user-submitted-links">';
userList.forEach(({ name, url, profile_thumb_url }) => {
content += `<li>`;
content += `<a
href="${url}"
style="display: flex; align-items: center; border: dotted 1px rgba(255,255,255,0.5); padding: 4px"
>`;
if (profile_thumb_url) {
content += `<img
alt="${name} thumbnail"
title="${name}"
src="${profile_thumb_url}"
style="max-height: 40px; padding-right: 10px"
/>`;
}
content += `<span>${name}</span>`;
content += `</a>`;
content += "</li>";
});
content += "</ol>";
return content;
}
}
(async function () {
if (window.location.hostname == "www.furaffinity.net") {
await fa();
} else {
console.log("unhandled domain ", window.location.hostname);
}
})();