4 Commits

Author SHA1 Message Date
Dylan Knutson
572c61cebb add proxies 2025-07-23 04:51:44 +00:00
Dylan Knutson
7134d20d71 ip spoofing check 2025-07-23 03:31:25 +00:00
Dylan Knutson
a3d1fd7b82 Update task task-30 2025-07-23 02:37:22 +00:00
Dylan Knutson
7095816b19 Update task task-16 2025-07-23 02:37:01 +00:00
13 changed files with 107 additions and 19 deletions

View File

@@ -64,6 +64,40 @@ services:
volumes:
- devcontainer-redux-grafana-data:/var/lib/grafana
airvpn-netherlands-proxy:
image: qmcgaw/gluetun
cap_add:
- NET_ADMIN
devices:
- /dev/net/tun:/dev/net/tun
environment:
- HTTPPROXY=on
- SHADOWSOCKS=on
- HTTPPROXY_LOG=on
- VPN_SERVICE_PROVIDER=airvpn
- VPN_TYPE=wireguard
- WIREGUARD_PRIVATE_KEY=INLA6x1gUVLRPKcCBgRmfpJBCXhOpyq3SvRd5EvCE08=
- WIREGUARD_PRESHARED_KEY=DR6CBW9yG5y+D+qpo8TZCizo5WKOooC/UFBdWk6lGEg=
- WIREGUARD_ADDRESSES=10.165.87.232,fd7d:76ee:e68f:a993:4d1b:a77a:b471:a606
- SERVER_COUNTRIES=Netherlands
airvpn-san-jose-proxy:
image: qmcgaw/gluetun
cap_add:
- NET_ADMIN
devices:
- /dev/net/tun:/dev/net/tun
environment:
- HTTPPROXY=on
- SHADOWSOCKS=on
- HTTPPROXY_LOG=on
- VPN_SERVICE_PROVIDER=airvpn
- VPN_TYPE=wireguard
- WIREGUARD_PRIVATE_KEY=INLA6x1gUVLRPKcCBgRmfpJBCXhOpyq3SvRd5EvCE08=
- WIREGUARD_PRESHARED_KEY=DR6CBW9yG5y+D+qpo8TZCizo5WKOooC/UFBdWk6lGEg=
- WIREGUARD_ADDRESSES=10.165.87.232/32,fd7d:76ee:e68f:a993:4d1b:a77a:b471:a606/128
- SERVER_CITIES="San Jose California, Fremont California"
volumes:
postgres-17-data:
devcontainer-redux-gem-cache:

View File

@@ -49,11 +49,11 @@
}
.log-entry-table-header-cell {
@apply border-b border-slate-200 bg-slate-50 px-2 py-1 text-xs font-medium uppercase tracking-wider text-slate-500;
@apply bg-slate-50 py-1 text-xs font-medium uppercase tracking-wider text-slate-500;
}
.log-entry-table-row-cell {
@apply flex items-center border-b border-slate-200 px-2 py-1 text-sm group-hover:bg-slate-50;
@apply flex items-center py-1 text-sm;
}
.rich-text-content blockquote {

View File

@@ -370,4 +370,18 @@ module LogEntriesHelper
end
raw fragment
end
sig { params(performed_by: String).returns(String) }
def performed_by_to_short_code(performed_by)
case performed_by
when "direct"
"DR"
when "airvpn-1-netherlands"
"NL"
when "airvpn-2-san-jose"
"SJ"
else
"??"
end
end
end

View File

@@ -20,6 +20,7 @@ class Scraper::CurlHttpPerformer
const :response_headers, T::Hash[String, String]
const :response_time_ms, Integer
const :body, String
const :performed_by, String
end
class Request < T::Struct
@@ -48,6 +49,21 @@ class Scraper::CurlHttpPerformer
def do_request_impl(request)
curl = get_curl
start_at = Time.now
proxy_url = ENV["HTTP_PROXY_URL"]
performed_by =
case proxy_url
when nil
"direct"
when /airvpn-netherlands-proxy:(\d+)/
"airvpn-1-netherlands"
when /airvpn-san-jose-proxy:(\d+)/
"airvpn-2-san-jose"
else
raise("Unknown proxy URL: #{proxy_url}")
end
curl.proxy_url = proxy_url
curl.timeout = 30
curl.url = request.uri.normalize.to_s
curl.follow_location = request.follow_redirects
@@ -100,10 +116,11 @@ class Scraper::CurlHttpPerformer
Response.new(
uri: request.uri,
response_code: response_code,
response_headers: response_headers,
response_time_ms: response_time_ms,
response_code:,
response_headers:,
response_time_ms:,
body: body_str,
performed_by:,
)
end

View File

@@ -179,6 +179,7 @@ class Scraper::HttpClient
response_headers = response.response_headers
response_time_ms = response.response_time_ms
response_body = response.body
performed_by = response.performed_by
@domain_last_requested_at[uri.host] = requested_at +
(response_time_ms.to_f / 1000)
@@ -230,7 +231,7 @@ class Scraper::HttpClient
response_time_ms: response_time_ms,
requested_at: requested_at,
caused_by_entry: caused_by_entry,
performed_by: "direct",
performed_by: performed_by,
},
)

View File

@@ -5,7 +5,16 @@ class HttpLogEntry < ReduxApplicationRecord
enum :verb, %i[get post], prefix: true
enum :performed_by,
%i[direct legacy proxy-1 dedipath-1 direct-gdl serverhost-1],
%i[
direct
legacy
proxy-1
dedipath-1
direct-gdl
serverhost-1
airvpn-1-netherlands
airvpn-2-san-jose
],
prefix: true
belongs_to :response,

View File

@@ -19,7 +19,7 @@
<% end %>
<div class='mx-auto mt-4 sm:mt-6 text-center '>
<h1 class='text-2xl'>HTTP Log Entries</h1>
<div class='mt-2 text-lg flex items-center justify-center gap-2'>
<div class='mt-2 text-lg flex items-center justify-center'>
<%= link_to stats_log_entries_path(seconds: 60), class: "inline-flex items-center gap-1 text-blue-600 hover:text-blue-800" do %>
<%= render partial: "shared/icons/chart_bars", locals: { class_name: "w-5 h-5" } %>
View Statistics
@@ -36,9 +36,9 @@
<% end %>
</div>
<%= render partial: "shared/pagination_controls", locals: { collection: @log_entries } %>
<div class="grid grid-cols-[auto_auto_auto_auto_1fr_auto_auto_auto] max-w-screen-lg mx-auto overflow-hidden border border-slate-200 bg-white shadow mb-4 rounded-lg">
<div class='contents'>
<div class="log-entry-table-header-cell text-center rounded-tl">ID</div>
<div class="grid grid-cols-[auto_auto_auto_auto_auto_1fr_auto_auto_auto] max-w-screen-lg mx-auto border border-slate-200 divide-y divide-slate-200 bg-white shadow mb-4 rounded-lg">
<div class="grid grid-cols-subgrid col-span-full px-2">
<div class="log-entry-table-header-cell text-center rounded-tl col-span-2">ID</div>
<div class="log-entry-table-header-cell text-right">Size</div>
<div class="log-entry-table-header-cell text-center">Time</div>
<div class="log-entry-table-header-cell text-center">Status</div>
@@ -48,7 +48,10 @@
<div class="log-entry-table-header-cell text-right rounded-tr">Resp</div>
</div>
<% @log_entries.each do |hle| %>
<div class="contents group">
<div class="grid grid-cols-subgrid col-span-full group divide-x [&>*]:pl-2 [&>*]:pr-2 divide-slate-200 hover:bg-slate-50">
<div class="log-entry-table-row-cell text-center text-slate-400">
<span class="text-sm font-medium" title="<%= hle.performed_by %>"><%= performed_by_to_short_code(hle.performed_by) %></span>
</div>
<div class="log-entry-table-row-cell justify-end">
<%= link_to hle.id, log_entry_path(hle.id), class: "text-blue-600 hover:text-blue-800 font-medium" %>
</div>
@@ -56,9 +59,9 @@
<%= HexUtil.humansize(hle.response_size) %>
</div>
<div class="log-entry-table-row-cell text-right">
<%= time_ago_in_words(hle.created_at, include_seconds: true) %> ago
<%= time_ago_in_words(hle.created_at, include_seconds: true) %>
</div>
<div class="log-entry-table-row-cell text-center">
<div class="log-entry-table-row-cell justify-center">
<span class="<%= hle.status_code == 200 ? 'bg-green-100 text-green-800' : 'bg-red-100 text-red-800' %> px-2 py-1 rounded-full text-xs font-medium">
<%= hle.status_code %>
</span>
@@ -92,7 +95,7 @@
</div>
<div class="log-entry-table-row-cell">
<span class="max-w-24 truncate inline-block" title="<%= hle.content_type %>">
<%= hle.content_type %>
<%= hle.content_type.split(";")[0] %>
</span>
</div>
<div class="justify-end log-entry-table-row-cell">

View File

@@ -1,9 +1,10 @@
---
id: task-16
title: Add followers and following to FA user page
status: To Do
status: Done
assignee: []
created_date: '2025-07-08'
updated_date: '2025-07-23'
labels: []
dependencies: []
---

View File

@@ -1,9 +1,10 @@
---
id: task-30
title: Create GlobalState for backfill job management
status: To Do
status: Done
assignee: []
created_date: '2025-07-08'
updated_date: '2025-07-23'
labels: []
dependencies: []
---

View File

@@ -1,7 +1,7 @@
---
id: task-32
title: Fix IP address for Cloudflare proxied requests
status: To Do
status: Done
assignee: []
created_date: '2025-07-08'
labels: []

View File

@@ -34,5 +34,7 @@ module ReduxScraper
# in config/environments, which are processed later.
#
config.time_zone = "Pacific Time (US & Canada)"
config.action_dispatch.ip_spoofing_check = true
end
end

View File

@@ -6,7 +6,12 @@ RSpec.describe Scraper::SofurryHttpClientConfig do
describe "#map_response" do
let(:base_response_attrs) do
{ response_code: 200, response_time_ms: 100, body: "body" }
{
response_code: 200,
response_time_ms: 100,
body: "body",
performed_by: "direct",
}
end
context "with a response from an API host" do

View File

@@ -55,6 +55,7 @@ class SpecUtil
response_headers: response_headers,
response_time_ms: response_time_ms,
body: response_body,
performed_by: "direct",
),
)
mock