Files
redux-scraper/app/models/http_log_entry_header.rb
2025-01-01 03:29:53 +00:00

57 lines
1.6 KiB
Ruby

# typed: true
class HttpLogEntryHeader < ReduxApplicationRecord
validates_presence_of(:sha256)
validates :sha256, length: { is: 32 }
def self.build(headers:)
raise("must be a hash") unless headers.is_a?(Hash)
headers = headers.dup
scrub_header(headers, "etag")
scrub_header(headers, "content-length")
scrub_header(headers, "cache-control")
scrub_header(headers, "perf")
scrub_header(headers, "x-connection-hash")
scrub_header(headers, "x-transaction-id")
scrub_header(headers, "x-rate-limit-remaining")
scrub_header(headers, "x-rate-limit-reset")
scrub_header(headers, "x-response-time")
scrub_datetime_header(headers, "expires")
scrub_datetime_header(headers, "last-modified")
scrub_datetime_header(headers, "date")
if headers["cf-ray"]
ray = headers["cf-ray"].split("-")
ray[0] = "(scrubbed)"
headers["cf-ray"] = ray.join("-")
end
headers = headers.sort.to_h
sha256 = Digest::SHA256.digest(headers.to_s)
HttpLogEntryHeader.new(sha256: sha256, headers: headers)
end
def self.find_or_build(headers:)
record = build(headers: headers)
find_by(sha256: record.sha256) || record
end
def self.find_or_create(headers:)
record = find_or_build(headers: headers)
record.save! unless record.persisted?
record
end
def self.scrub_header(hash, key)
hash[key] = "(scrubbed)" if hash[key]
end
def self.scrub_datetime_header(hash, key)
hash[key].gsub!(/\d\d:\d\d:\d\d/, "(scrubbed)") if hash[key]
end
def self.empty
@empty_model ||= self.find_or_create(headers: {})
end
end