package net.iyouqu.spider.parser;

import java.util.Iterator;
import net.iyouqu.spider.rating.Rating;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: assets/extractor.dex */
public class BasicParser implements Parser {
    private StringBuffer checkPath(Element element, StringBuffer stringBuffer, Document document) {
        if (element == null || element.parent() == null) {
            return stringBuffer;
        }
        if ("div".equals(element.tagName())) {
            if (element.hasAttr("id")) {
                stringBuffer.insert(0, String.valueOf(element.tagName()) + "#" + element.attr("id") + ">");
                return stringBuffer;
            }
            if (element.hasAttr("class") && document.getElementsByClass(element.attr("class").trim().replace(" ", ".")).size() <= 1) {
                stringBuffer.insert(0, String.valueOf(element.tagName()) + "." + element.attr("class") + ">");
                return stringBuffer;
            }
        }
        if (element.parent() != null) {
            Element parent = element.parent();
            String tagName = parent.tagName();
            if (parent.hasAttr("id")) {
                stringBuffer.insert(0, String.valueOf(tagName) + "#" + parent.attr("id") + ">");
            } else if (parent.hasAttr("class")) {
                String replace = parent.attr("class").trim().replace(" ", ".");
                if ("p".equals(tagName)) {
                    replace = "";
                }
                if ("".equals(replace)) {
                    stringBuffer.insert(0, String.valueOf(tagName) + ">");
                } else {
                    stringBuffer.insert(0, String.valueOf(tagName) + "." + replace + ">");
                }
                if (!"p".equals(tagName) && document.getElementsByClass(replace).size() <= 1) {
                    return stringBuffer;
                }
                stringBuffer = checkPath(element.parent(), stringBuffer, document);
            } else {
                stringBuffer.insert(0, String.valueOf(tagName) + ">");
                if (!"body".equals(tagName)) {
                    stringBuffer = checkPath(element.parent(), stringBuffer, document);
                }
            }
        }
        return stringBuffer;
    }

    private int doScoreToElement(Element element) {
        Elements children = element.children();
        if (children.size() == 0) {
            return Rating.doRate(element);
        }
        int doOwnTextRate = Rating.doOwnTextRate(element);
        Iterator it = children.iterator();
        while (it.hasNext()) {
            doOwnTextRate += doScoreToElement((Element) it.next());
        }
        element.attr("score", String.valueOf(doOwnTextRate));
        return doOwnTextRate;
    }

    @Override // net.iyouqu.spider.parser.Parser
    public String convertContent(Element element) {
        String html = element.html();
        if (html.indexOf("&lt;") > 0 || html.indexOf("&gt;") > 0) {
            html = html.replaceAll("(&lt;)", "<").replaceAll("(&gt;)", ">");
            element = element.html(html);
        }
        Elements after = element.getElementsByTag("br").after("\\n");
        if (after != null && after.size() > 0) {
            after.remove();
        }
        Elements elementsByTag = element.getElementsByTag("p");
        if (elementsByTag != null && elementsByTag.size() > 0) {
            html.replaceAll("(<p>|</p>)", "\\n");
        }
        return element.text().replace("&nbsp;", " ");
    }

    @Override // net.iyouqu.spider.parser.Parser
    public String denioseContentForContentElement(Element element, String[] strArr) {
        String html = element.html();
        for (String str : new String[]{"(正文)*", "((第[\\W]+章){1}([\\s])+([^\\x00-\\xff]*[\\w]*)+){1}", "([`~!@#$%^*()+=|{}'',\\[\\]\"])"}) {
            html = html.replaceAll(str, "");
        }
        element.html(removeYellowWords(html, strArr));
        return html;
    }

    @Override // net.iyouqu.spider.parser.Parser
    public void denioseElementForContentElement(Element element) {
        element.getElementsByTag("div").remove();
        element.getElementsByTag("span").remove();
    }

    @Override // net.iyouqu.spider.parser.Parser
    public Document denoiseElementForDoc(Document document) {
        document.getElementsByTag("script").remove();
        document.getElementsByTag("style").remove();
        document.getElementsByTag("select").remove();
        document.getElementsByTag("link").remove();
        document.getElementsByTag("input").remove();
        document.getElementsByTag("object").remove();
        document.getElementsByTag("textarea").remove();
        document.getElementsByTag("ul").remove();
        document.getElementsByTag("img").remove();
        document.getElementsByTag("a").attr("href", "javascript:void(0)").remove();
        document.getElementsByAttributeValue("display", "none").remove();
        document.getElementsByAttributeValueStarting("class", "foot").remove();
        document.getElementsByAttributeValue("class", "settings").remove();
        document.getElementsByAttributeValueContaining("style", "display:none").remove();
        document.getElementsByAttributeValueContaining("style", "overflow: hidden").remove();
        return document;
    }

    @Override // net.iyouqu.spider.parser.Parser
    public Element excavateContent(Document document) {
        Element body = document.body();
        doScoreToElement(body);
        String stringBuffer = checkPath(getMaxScoreChild(body), new StringBuffer(), document).toString();
        if (stringBuffer.contains(">p>")) {
            stringBuffer = stringBuffer.split(">p>")[0];
        }
        if (stringBuffer.endsWith(">")) {
            stringBuffer = stringBuffer.substring(0, stringBuffer.length() - 1);
        }
        if (stringBuffer.endsWith(">p")) {
            stringBuffer = stringBuffer.substring(0, stringBuffer.length() - 2);
        }
        return body.select(stringBuffer).first();
    }

    @Override // net.iyouqu.spider.parser.Parser
    public String getContent(Document document, String[] strArr) {
        denoiseElementForDoc(document);
        Element excavateContent = excavateContent(document);
        if (excavateContent == null) {
            return null;
        }
        denioseElementForContentElement(excavateContent);
        denioseContentForContentElement(excavateContent, strArr);
        return convertContent(excavateContent);
    }

    public Element getMaxScoreChild(Element element) {
        Elements children;
        if (element.childNodeSize() == 0 || (children = element.children()) == null || children.size() == 0) {
            return element;
        }
        Element first = children.first();
        int i = 0;
        Iterator it = children.iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            String attr = element2.attr("score");
            if (attr != null && Integer.valueOf(attr).intValue() > i) {
                first = element2;
                i = Integer.valueOf(attr).intValue();
            }
        }
        return getMaxScoreChild(first);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String removeYellowWords(String str, String[] strArr) {
        if (strArr != null && strArr.length > 0) {
            for (String str2 : strArr) {
                str = str.replaceAll(str2, "*");
            }
        }
        return str;
    }
}
