package com.madsvyat.simplerssreader.util.extractor;

import com.madsvyat.simplerssreader.App;
import com.madsvyat.simplerssreader.util.NetworkLoader;
import com.madsvyat.simplerssreader.util.extractor.ArticleExtractor;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Pattern;
import javax.inject.Inject;
import org.apache.commons.text.lookup.StringLookupFactory;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

/* loaded from: classes.dex */
public class DefaultArticleExtractor implements ArticleExtractor {
    private NetworkLoader networkLoader;
    private static final Pattern TARGET_NODES = Pattern.compile("p|div|td|h1|h2|article|section");
    private static final Pattern UNLIKELY_NODES_PATTERN = Pattern.compile("com(bx|ment|munity)|dis(qus|cuss)|e(xtra|[-]?mail)|foot|header|menu|re(mark|ply)|rss|sh(are|outbox)|sponsora(d|ll|gegate|rchive|ttachment)|(pag(er|ination))|popup|print|login|si(debar|gn|ngle)");
    private static final Pattern POSITIVE = Pattern.compile("(^(body|content|h?entry|main|page|post|text|blog|story|haupt))|arti(cle|kel|cleContent)|instapaper_body");
    private static final Pattern NEGATIVE = Pattern.compile("nav($|igation)|user|com(ment|bx|mentlist)|(^com-)|contact|foot|footer|footnote|masthead|(me(dia|ta))|outbrain|promo|related|scroll|(sho(utbox|pping))|sidebar|sponsor|tags|tool|widget|player|disclaimer|toc|infobox|vcard|adv");
    private static final Pattern NEGATIVE_STYLE = Pattern.compile("hidden|display: ?none|font-size: ?small");
    private static final NodeTraversor NODE_TRAVERSOR = new NodeTraversor(new NodeVisitor() { // from class: com.madsvyat.simplerssreader.util.extractor.DefaultArticleExtractor.1
        /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
        @Override // org.jsoup.select.NodeVisitor
        public void head(Node node, int i) {
            if (node instanceof Element) {
                node.removeAttr("style");
            }
        }

        /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
        @Override // org.jsoup.select.NodeVisitor
        public void tail(Node node, int i) {
        }
    });

    /* JADX INFO: Access modifiers changed from: package-private */
    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    public DefaultArticleExtractor() {
        App.getAppComponent().inject(this);
    }

    /* JADX WARN: Unreachable blocks removed: 2, instructions: 2 */
    private int calcWeight(Element element) {
        int i = POSITIVE.matcher(element.className()).find() ? 40 : 0;
        if (POSITIVE.matcher(element.id()).find()) {
            i += 40;
        }
        if (UNLIKELY_NODES_PATTERN.matcher(element.className()).find()) {
            i -= 20;
        }
        if (UNLIKELY_NODES_PATTERN.matcher(element.id()).find()) {
            i -= 20;
        }
        if (NEGATIVE.matcher(element.className()).find()) {
            i -= 50;
        }
        if (NEGATIVE.matcher(element.id()).find()) {
            i -= 50;
        }
        String attr = element.attr("style");
        return (attr == null || attr.isEmpty() || !NEGATIVE_STYLE.matcher(attr).find()) ? i : i - 50;
    }

    /* JADX WARN: Unreachable blocks removed: 2, instructions: 2 */
    private int calcWeightForChild(Element element, String str) {
        if (count(str, "&quot;") + count(str, "&lt;") + count(str, "&gt;") + count(str, "px") > 5) {
            return -30;
        }
        double length = str.length();
        Double.isNaN(length);
        return (int) Math.round(length / 25.0d);
    }

    /* JADX WARN: Unreachable blocks removed: 2, instructions: 2 */
    private int count(String str, String str2) {
        int indexOf = str.indexOf(str2);
        return indexOf >= 0 ? 1 + count(str.substring(indexOf + str2.length()), str2) : 0;
    }

    /* JADX WARN: Unreachable blocks removed: 2, instructions: 2 */
    private Collection<Element> getNodes(Document document) {
        Elements select;
        HashSet hashSet = new HashSet(64);
        Element body = document.body();
        if (body != null && (select = body.select("*")) != null) {
            Iterator<Element> it = select.iterator();
            while (it.hasNext()) {
                Element next = it.next();
                if (TARGET_NODES.matcher(next.tagName()).matches()) {
                    hashSet.add(next);
                }
            }
        }
        return hashSet;
    }

    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    private int getWeight(Element element) {
        int calcWeight = calcWeight(element);
        double length = element.ownText().length();
        Double.isNaN(length);
        return calcWeight + ((int) Math.round((length / 100.0d) * 10.0d)) + weightChildNodes(element);
    }

    /* JADX WARN: Unreachable blocks removed: 3, instructions: 3 */
    private void prepareDocument(Document document) {
        removeAllByTag(document, StringLookupFactory.KEY_SCRIPT);
        removeAllByTag(document, "noscript");
        removeAllByTag(document, "style");
        Iterator<Element> it = document.select("div[id~=com(ment|bx|mentlist)|^answ],.adv").iterator();
        while (it.hasNext()) {
            it.next().remove();
        }
        Iterator<Element> it2 = document.select("div[style~=display:none").iterator();
        while (it2.hasNext()) {
            it2.next().remove();
        }
    }

    /* JADX WARN: Unreachable blocks removed: 2, instructions: 2 */
    private void removeAllByTag(Document document, String str) {
        Iterator<Element> it = document.getElementsByTag(str).iterator();
        while (it.hasNext()) {
            it.next().remove();
        }
    }

    /* JADX WARN: Unreachable blocks removed: 3, instructions: 3 */
    private void replaceRelSrc(Element element) {
        Iterator<Element> it = element.select("img").iterator();
        while (it.hasNext()) {
            Element next = it.next();
            next.attr("src", next.absUrl("src"));
        }
        Iterator<Element> it2 = element.select("a").iterator();
        while (it2.hasNext()) {
            Element next2 = it2.next();
            next2.attr("href", next2.absUrl("href"));
        }
    }

    /* JADX WARN: Unreachable blocks removed: 6, instructions: 6 */
    private int weightChildNodes(Element element) {
        ArrayList arrayList = new ArrayList(5);
        Iterator<Element> it = element.children().iterator();
        int i = 0;
        Element element2 = null;
        while (it.hasNext()) {
            Element next = it.next();
            String ownText = next.ownText();
            int length = ownText.length();
            if (length >= 20) {
                if (length > 190) {
                    i += Math.max(50, length / 10);
                }
                if (next.tagName().equals("h1") || next.tagName().equals("h2")) {
                    i += 30;
                } else if (next.tagName().equals("div") || next.tagName().equals("p")) {
                    i += calcWeightForChild(next, ownText);
                    if (next.tagName().equals("p") && length > 50) {
                        arrayList.add(next);
                    }
                    if (next.className().toLowerCase(Locale.getDefault()).equals("caption")) {
                        element2 = next;
                    }
                }
            }
        }
        if (element2 != null) {
            i += 30;
        }
        if (arrayList.size() >= 2) {
            Iterator<Element> it2 = element.children().iterator();
            while (it2.hasNext()) {
                if ("h1;h2;h3;h4;h5;h6".contains(it2.next().tagName())) {
                    i += 20;
                }
            }
        }
        return i;
    }

    @Override // com.madsvyat.simplerssreader.util.extractor.ArticleExtractor
    public void extractContentAsync(long j, String str, String str2, ArticleExtractor.OnContentExtractedListener onContentExtractedListener, ArticleExtractor.OnErrorListener onErrorListener) {
        throw new UnsupportedOperationException("async content extraction not implemented yet");
    }

    /* JADX WARN: Unreachable blocks removed: 5, instructions: 5 */
    @Override // com.madsvyat.simplerssreader.util.extractor.ArticleExtractor
    public String extractContentSync(long j, String str, String str2) {
        Document loadHTMLDocument = this.networkLoader.loadHTMLDocument(str);
        if (loadHTMLDocument == null) {
            return null;
        }
        prepareDocument(loadHTMLDocument);
        int i = 0;
        Iterator<Element> it = getNodes(loadHTMLDocument).iterator();
        Element element = null;
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Element next = it.next();
            int weight = getWeight(next);
            if (weight > i) {
                if (weight > 200) {
                    element = next;
                    break;
                }
                element = next;
                i = weight;
            }
        }
        if (element == null) {
            return null;
        }
        if (str2 != null) {
            Iterator<Element> it2 = element.getAllElements().iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                }
                Element next2 = it2.next();
                if (next2.hasText() && next2.text().equals(str2)) {
                    next2.remove();
                    break;
                }
            }
        }
        replaceRelSrc(element);
        NODE_TRAVERSOR.traverse(element);
        return element.toString();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    @Inject
    public void setNetworkLoader(NetworkLoader networkLoader) {
        this.networkLoader = networkLoader;
    }
}
