package com.chimbori.crux.articles;

import com.chimbori.crux.common.Log;
import com.chimbori.crux.common.StringUtils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import java.util.regex.Pattern;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: classes.dex */
public class d {
    private static final Pattern a = Pattern.compile("display\\:none|visibility\\:hidden");
    private static final Set<String> b = new HashSet(Arrays.asList("font", "table", "tbody", "tr", "td", "div", "ol", "ul", "li", "span"));
    private static final Set<String> c = new HashSet(Arrays.asList("p", "b", "i", "u", "strong", "em", "a", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote"));
    private static final Set<String> d = new HashSet(Arrays.asList("b", "i", "u", "strong", "em", "a", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote"));
    private static final Set<String> e = new HashSet(Arrays.asList("href"));
    private static final Set<String> f = new HashSet(Arrays.asList("p", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "li"));

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Document a(Element element) {
        Log.i("postprocess", new Object[0]);
        Document document = new Document("");
        if (element == null) {
            return document;
        }
        h(element);
        b(element);
        g(element);
        e(element);
        d(element);
        c(element);
        f(element);
        j(element);
        Iterator<Node> it2 = element.childNodes().iterator();
        while (it2.hasNext()) {
            document.appendChild(it2.next().mo39clone());
        }
        return document;
    }

    private static void b(Element element) {
        Iterator<Element> it2 = element.select("br + br").iterator();
        while (it2.hasNext()) {
            it2.next().remove();
        }
        Iterator<Element> it3 = element.select("br").iterator();
        while (it3.hasNext()) {
            Element next = it3.next();
            if (next.previousSibling() != null) {
                next.previousSibling().after(" • ");
            } else {
                next.parent().append(" • ");
            }
            next.unwrap();
        }
    }

    private static void c(Element element) {
        Iterator<Element> it2 = element.children().iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            if (!f.contains(next.tagName())) {
                Log.printAndRemove(next, "removeTopLevelTagsNotLikelyToBeParagraphs");
            }
        }
    }

    private static void d(Element element) {
        Iterator<Element> it2 = element.children().iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            if (!c.contains(next.tagName())) {
                Log.printAndRemove(next, "removeTagsNotLikelyToBeParagraphs");
            } else if (next.children().size() > 0) {
                d(next);
            }
        }
    }

    private static void e(Element element) {
        Iterator<Element> it2 = element.children().iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            e(next);
            if (b.contains(next.tagName())) {
                Log.i("removeTagsButRetainContent: [%s] %s", next.tagName(), next.outerHtml());
                next.tagName("p");
            }
        }
    }

    private static void f(Element element) {
        String str;
        boolean z;
        for (int childNodeSize = element.childNodeSize() - 1; childNodeSize >= 0; childNodeSize--) {
            Node childNode = element.childNode(childNodeSize);
            if (childNode instanceof TextNode) {
                str = ((TextNode) childNode).text().trim();
                z = false;
            } else if (childNode instanceof Element) {
                Element element2 = (Element) childNode;
                str = element2.text().trim();
                z = d.contains(element2.tagName());
            } else {
                str = null;
                z = false;
            }
            Log.i("removeShortParagraphs: [%s] isExemptFromMinTextLengthCheck : %b", childNode, Boolean.valueOf(z));
            if (str == null || str.isEmpty() || ((!z && str.length() < 50) || str.length() > StringUtils.countLetters(str) * 2)) {
                Log.printAndRemove(childNode, "removeShortParagraphs:");
            }
        }
    }

    private static void g(Element element) {
        Iterator<Element> it2 = element.children().iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            if (i(next)) {
                Log.printAndRemove(next, "removeUnlikelyChildNodes");
            } else if (next.children().size() > 0) {
                g(next);
            }
        }
    }

    private static void h(Element element) {
        Iterator<Element> it2 = element.select(a.a).iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            if (Integer.parseInt(next.attr("gravityScore")) < 0 || next.text().length() < 50) {
                Log.printAndRemove(next, "removeNodesWithNegativeScores");
            }
        }
    }

    private static boolean i(Element element) {
        String attr = element.attr("style");
        String attr2 = element.attr("class");
        return (attr2 != null && attr2.toLowerCase().contains("caption")) || a.matcher(attr).find() || (attr2 != null && a.matcher(attr2).find());
    }

    private static void j(Element element) {
        Iterator<Element> it2 = element.children().iterator();
        while (it2.hasNext()) {
            j(it2.next());
        }
        LinkedList linkedList = new LinkedList();
        Iterator<Attribute> it3 = element.attributes().iterator();
        while (it3.hasNext()) {
            Attribute next = it3.next();
            if (!e.contains(next.getKey())) {
                linkedList.add(next.getKey());
            }
        }
        Iterator it4 = linkedList.iterator();
        while (it4.hasNext()) {
            element.removeAttr((String) it4.next());
        }
    }
}
