fix extraction of images to not get true as altags or empty src images
This commit is contained in:
+12
-4
@@ -472,10 +472,18 @@ function contentDispatch(funcName, args) {
|
||||
}));
|
||||
}
|
||||
function extractImages() {
|
||||
return Array.from(document.querySelectorAll("img")).map(img => ({
|
||||
alt: img.alt,
|
||||
src: img.src,
|
||||
}));
|
||||
return Array.from(document.querySelectorAll("img")).map(img => {
|
||||
const src =
|
||||
img.src ||
|
||||
img.getAttribute("data-src") ||
|
||||
img.getAttribute("data-lazy-src") ||
|
||||
img.getAttribute("data-original") ||
|
||||
(img.srcset ? img.srcset.split(",")[0].trim().split(" ")[0] : "") ||
|
||||
"";
|
||||
const FAKE_ALT = new Set(["true", "false", "null", "undefined", "image", "img"]);
|
||||
const alt = img.alt && !FAKE_ALT.has(img.alt.trim().toLowerCase()) ? img.alt.trim() : "";
|
||||
return { alt, src };
|
||||
}).filter(img => img.src !== "");
|
||||
}
|
||||
function extractText() {
|
||||
return document.body.innerText;
|
||||
|
||||
Reference in New Issue
Block a user