Files
redux-scraper/app/helpers/log_entries_helper.rb
2025-02-25 19:59:41 +00:00

138 lines
3.6 KiB
Ruby

# typed: strict
module LogEntriesHelper
extend T::Sig
extend T::Helpers
include HelpersInterface
abstract!
sig { params(content_type: String).returns(T::Boolean) }
def is_send_data_content_type?(content_type)
is_renderable_image_type?(content_type) ||
is_renderable_video_type?(content_type) ||
is_renderable_audio_type?(content_type) ||
is_flash_content_type?(content_type)
end
sig { params(uri_path: String).returns(T::Array[[String, String]]) }
def path_iterative_parts(uri_path)
path_parts = uri_path.split("/")
(1...path_parts.length).map do |i|
[
T.must(path_parts[i]),
T.must(path_parts[0..i]).join("/") +
(i == path_parts.length - 1 ? "" : "/"),
]
end
end
sig { params(content_type: String).returns(T.nilable(String)) }
def ext_for_content_type(content_type)
case content_type
when "image/jpeg"
"jpeg"
when "image/jpg"
"jpg"
when "image/png"
"png"
when "image/gif"
"gif"
when "video/webm"
"webm"
when "audio/mpeg"
"mp3"
when "audio/mp3"
"mp3"
when "audio/wav"
"wav"
else
nil
end
end
sig { params(content_type: String).returns(T::Boolean) }
def is_renderable_image_type?(content_type)
%w[image/jpeg image/jpg image/png image/gif].any? do |ct|
content_type.starts_with?(ct)
end
end
sig { params(content_type: String).returns(T::Boolean) }
def is_json_content_type?(content_type)
content_type.starts_with?("application/json")
end
sig { params(content_type: String).returns(T::Boolean) }
def is_renderable_video_type?(content_type)
%w[video/mp4 video/webm].any? { |ct| content_type.starts_with?(ct) }
end
sig { params(content_type: String).returns(T::Boolean) }
def is_renderable_audio_type?(content_type)
%w[audio/mpeg audio/mp3 audio/wav audio/ogg].any? do |ct|
content_type.starts_with?(ct)
end
end
sig { params(content_type: String).returns(T::Boolean) }
def is_flash_content_type?(content_type)
content_type.match? %r{application/x-shockwave-flash}
end
sig { params(content_type: String).returns(T::Boolean) }
def is_thumbable_content_type?(content_type)
is_renderable_video_type?(content_type) ||
is_renderable_image_type?(content_type)
end
sig { params(log_entry: HttpLogEntry).returns(T.nilable(String)) }
def render_msword_content(log_entry)
docx_body = log_entry.response_bytes
return nil if docx_body.blank?
# Invoke abiword to convert doc / docx to html
# Run abiword conversion with pipes
stdin, stdout, wait_thr =
Open3.popen2(
"abiword",
"--display=0",
"--to=html",
"--to-name=fd://1",
"fd://0",
)
stdin.binmode
stdin.write(docx_body)
stdin.close
stdout_str = stdout.read
exit_status = T.cast(wait_thr.value, Process::Status)
return nil unless exit_status.success?
stdout_str.gsub!(/Abiword HTML Document/, "")
stdout_str = T.cast(T.unsafe(stdout_str).bbcode_to_html(false), String)
stdout_str.gsub!(%r{<br\s*/>}, "")
sanitizer =
Sanitize.new(
elements: %w[span div p i b strong em],
attributes: {
"span" => %w[style],
"div" => %w[style],
"p" => %w[style],
"b" => %w[style],
"i" => %w[style],
"strong" => %w[style],
"em" => %w[style],
},
css: {
properties: %w[font-size color text-align margin-bottom],
},
)
raw sanitizer.fragment(stdout_str)
ensure
stdin&.close
stdout&.close
end
end