fix posted date parsing for legacy scanned posts
This commit is contained in:
@@ -114,7 +114,13 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
when VERSION_0, VERSION_1
|
||||
idx = elem_idx_after_text_match(info_children, /Posted/)
|
||||
child = info_children[idx..idx + 5].find { |ic| ic.name == "span" }
|
||||
child.try(:[], "title").try(:strip)
|
||||
date_str = child.try(:[], "title").try(:strip)
|
||||
if date_str
|
||||
DateTime.strptime(
|
||||
date_str.gsub(/(\d+)(st|nd|rd|th)/, '\1'),
|
||||
"%b %d, %Y %I:%M %p",
|
||||
)
|
||||
end
|
||||
when VERSION_2
|
||||
date_str = @elem.css(".popup_date").first["title"]
|
||||
# e.g. 'Feb 18, 2023 06:47 PM'
|
||||
@@ -246,7 +252,7 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
case @page_version
|
||||
when VERSION_0
|
||||
elem_after_text_match(info_children, /Resolution/).try(:text).try(
|
||||
:strip
|
||||
:strip,
|
||||
)
|
||||
when VERSION_1
|
||||
idx = elem_idx_after_text_match(info_children, /Resolution/)
|
||||
|
||||
@@ -8,13 +8,13 @@ class Domain::Fa::PostFactorCalculator
|
||||
end
|
||||
|
||||
def fit
|
||||
limit = 100_000_000
|
||||
limit = 10_000_000
|
||||
dataset =
|
||||
measure(
|
||||
->(r) do
|
||||
proc do |r|
|
||||
r && "loaded #{r.length.to_s.bold} favs" ||
|
||||
"loading up to #{limit} favs"
|
||||
end
|
||||
"loading up to #{limit.to_s.bold} favs"
|
||||
end,
|
||||
) { Domain::Fa::Fav.all.limit(limit).pluck(:user_id, :post_id).to_a }
|
||||
|
||||
measure("convert to hash") do
|
||||
@@ -51,13 +51,13 @@ class Domain::Fa::PostFactorCalculator
|
||||
.each_slice(20_000) do |chunk|
|
||||
total += chunk.size
|
||||
measure(
|
||||
" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)"
|
||||
" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)",
|
||||
) do
|
||||
Domain::Fa::PostFactor.upsert_all(
|
||||
chunk,
|
||||
unique_by: :post_id,
|
||||
update_only: factors_col_name,
|
||||
returning: :id
|
||||
returning: :id,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -115,20 +115,20 @@ describe Domain::Fa::Parser::Page do
|
||||
assert_equal(
|
||||
{
|
||||
href: "/gallery/feretta/folder/60236/Tale-of-Tails",
|
||||
title: "Tale of Tails"
|
||||
title: "Tale of Tails",
|
||||
},
|
||||
folders[1]
|
||||
folders[1],
|
||||
)
|
||||
assert_equal(
|
||||
{
|
||||
href: "/gallery/feretta/folder/60234/Illustrations",
|
||||
title: "Illustrations"
|
||||
title: "Illustrations",
|
||||
},
|
||||
folders[2]
|
||||
folders[2],
|
||||
)
|
||||
assert_equal(
|
||||
{ href: "/gallery/feretta/folder/229520/Marketing", title: "Marketing" },
|
||||
folders.last
|
||||
folders.last,
|
||||
)
|
||||
end
|
||||
|
||||
@@ -177,7 +177,7 @@ describe Domain::Fa::Parser::Page do
|
||||
assert_equal "Miles-DF", sub.artist
|
||||
assert_equal "//d.facdn.net/art/miles-df/1455889648/1455889648.miles-df_miles-df_stream.jpg",
|
||||
sub.full_res_img
|
||||
assert_equal "Feb 19th, 2016 08:47 AM", sub.posted_date
|
||||
assert_equal DateTime.parse("Feb 19, 2016 08:47 AM"), sub.posted_date
|
||||
assert_equal "All", sub.category
|
||||
assert_equal "All", sub.theme
|
||||
assert_equal "Unspecified / Any", sub.species
|
||||
@@ -201,7 +201,7 @@ describe Domain::Fa::Parser::Page do
|
||||
assert_equal 82, sub.num_views
|
||||
assert_equal "Male", sub.gender
|
||||
assert_equal "Dragon (Other)", sub.species
|
||||
assert_equal "Feb 24th, 2016 12:51 AM", sub.posted_date
|
||||
assert_equal DateTime.parse("Feb 24, 2016 12:51 AM"), sub.posted_date
|
||||
assert_equal %w[kobold frog hunting male frog mighty hunter],
|
||||
sub.keywords_array
|
||||
end
|
||||
@@ -219,7 +219,7 @@ describe Domain::Fa::Parser::Page do
|
||||
assert sub.description_html =~ /Forgot to put this up/
|
||||
assert_equal "//d.facdn.net/art/feretta/1479650817/1479650802.feretta_161004_space_vixen_hop_desktop.jpg",
|
||||
sub.full_res_img
|
||||
assert_equal "Nov 20th, 2016 09:06 AM", sub.posted_date
|
||||
assert_equal DateTime.parse("Nov 20, 2016 09:06 AM"), sub.posted_date
|
||||
assert_equal "Artwork (Digital)", sub.category
|
||||
assert_equal "Miscellaneous", sub.theme
|
||||
assert_equal "Canid - Vulpine", sub.species
|
||||
@@ -247,14 +247,14 @@ describe Domain::Fa::Parser::Page do
|
||||
assert_equal({ href: "/scraps/salkitten/", title: "Scraps" }, folders[0])
|
||||
assert_equal(
|
||||
{ href: "/gallery/salkitten/folder/51051/7up-Pup", title: "7up Pup" },
|
||||
folders[2]
|
||||
folders[2],
|
||||
)
|
||||
assert_equal(
|
||||
{
|
||||
href: "/gallery/salkitten/folder/93190/Animations",
|
||||
title: "Animations"
|
||||
title: "Animations",
|
||||
},
|
||||
folders.last
|
||||
folders.last,
|
||||
)
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user