package com.flamenk.dom;

import com.flamenk.TagConstants;
import com.flamenk.util.Consumer;
import com.flamenk.util.NodeUtil;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.lang.StringEscapeUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;

/* loaded from: input_file:com/flamenk/dom/HtmlDocument.class */
public class HtmlDocument {
    private Optional<String> mTitle;
    private HtmlNode mRoot;
    private HtmlNode mMaxRankNode;
    private HtmlNodeArticleRanker mNodeRanker = SimpleHtmlNodeArticleRanker.getInstance();
    private TextTokenizer mTextTokenizer = SimpleTextTokenizer.getInstance();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/flamenk/dom/HtmlDocument$BuildTreeInfo.class */
    public static class BuildTreeInfo {
        Deque<HtmlNode> nodes;
        HtmlNode node;
        int nodesBuilt;
        double maxRank;
        HtmlNode nodeMaxRank;

        private BuildTreeInfo() {
            this.maxRank = 0.0d;
        }

        public BuildTreeInfo with(Deque<HtmlNode> deque, int i) {
            this.nodes = deque;
            this.nodesBuilt = i;
            return this;
        }

        public BuildTreeInfo with(HtmlNode htmlNode, int i) {
            this.node = htmlNode;
            this.nodesBuilt = i;
            return this;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void parse(Document document) {
        Preconditions.checkNotNull(document);
        String title = document.title();
        if (title == null || title.trim().isEmpty()) {
            this.mTitle = Optional.absent();
        } else {
            this.mTitle = Optional.of(title);
        }
        Node node = null;
        for (Node node2 : document.childNodes()) {
            if (TagConstants.HTML.equals(node2.nodeName())) {
                node = node2;
            }
        }
        if (node == null) {
            throw new IllegalArgumentException("The resource at " + document.baseUri() + " is not a valid html document. Does not contain the body root tag.");
        }
        BuildTreeInfo buildTree = buildTree(node, null, 0, new BuildTreeInfo());
        this.mRoot = buildTree.node;
        this.mMaxRankNode = buildTree.nodeMaxRank;
    }

    private BuildTreeInfo buildTrees(List<Node> list, HtmlNode htmlNode, int i, BuildTreeInfo buildTreeInfo) {
        LinkedList linkedList = new LinkedList();
        int i2 = i;
        Iterator<Node> it = list.iterator();
        while (it.hasNext()) {
            BuildTreeInfo buildTree = buildTree(it.next(), htmlNode, i2, buildTreeInfo);
            HtmlNode htmlNode2 = buildTree.node;
            i2 += buildTree.nodesBuilt;
            linkedList.add(htmlNode2);
        }
        return buildTreeInfo.with(linkedList, i2 - i);
    }

    private BuildTreeInfo buildTree(Node node, HtmlNode htmlNode, int i, BuildTreeInfo buildTreeInfo) {
        if (TagConstants.TEXT.equals(node.nodeName())) {
            return buildTreeInfo.with(buildTextNode(node, htmlNode, i), 1);
        }
        if (TagConstants.TEXTAREA.equals(node.nodeName())) {
            return buildTreeInfo.with(newHtmlNode(node, htmlNode, i), 1);
        }
        ArrayList arrayList = new ArrayList(node.childNodes().size());
        for (Node node2 : node.childNodes()) {
            if (!skip(node2)) {
                arrayList.add(node2);
            }
        }
        HtmlNode newHtmlNode = newHtmlNode(node, htmlNode, i);
        BuildTreeInfo buildTrees = buildTrees(arrayList, newHtmlNode, i + 1, buildTreeInfo);
        HtmlNodeMetrics metrics = newHtmlNode.getMetrics();
        for (HtmlNode htmlNode2 : buildTrees.nodes) {
            metrics.add(htmlNode2.getMetrics());
            int numTrailingTags = getNumTrailingTags(htmlNode2.getJsoupNode());
            int i2 = 0;
            if (NodeUtil.hasName(htmlNode2, TagConstants.DIV)) {
                i2 = 0 + 2;
            }
            metrics.addNumTokens(numTrailingTags);
            metrics.addNumDivTokens(i2);
            calcAndSetChildMetrics(newHtmlNode, htmlNode2, numTrailingTags, i2);
        }
        checkAndSetMaxRankNode(newHtmlNode, buildTreeInfo);
        return buildTreeInfo.with(newHtmlNode, buildTrees.nodesBuilt + 1);
    }

    private void calcAndSetChildMetrics(HtmlNode htmlNode, HtmlNode htmlNode2, int i, int i2) {
        if (NodeUtil.hasName(htmlNode2, TagConstants.DIV) || NodeUtil.hasName(htmlNode2, TagConstants.ARTICLE)) {
            HtmlNodeMetrics newCopy = HtmlNodeMetrics.newCopy(htmlNode2.getMetrics());
            newCopy.addNumDivTokens(i2);
            newCopy.addNumTokens(i);
            htmlNode.addChildBlockMetrics(newCopy);
            return;
        }
        HtmlNodeMetrics childNoBlockMetrics = htmlNode.getChildNoBlockMetrics();
        childNoBlockMetrics.add(htmlNode2.getChildNoBlockMetrics());
        childNoBlockMetrics.addNumDivTokens(i2);
        childNoBlockMetrics.addNumTokens(i);
        htmlNode.addAllChildBlockMetrics(htmlNode2.getMutableChildBlockMetrics());
    }

    private HtmlNode buildTextNode(Node node, HtmlNode htmlNode, int i) {
        HtmlNode newHtmlNode = newHtmlNode(node, htmlNode, i);
        String trim = StringEscapeUtils.unescapeHtml(node.outerHtml()).trim();
        int i2 = 0;
        boolean z = false;
        boolean z2 = false;
        if (!trim.isEmpty()) {
            LinkedList linkedList = new LinkedList();
            Optional<HtmlNode> parent = newHtmlNode.getParent();
            while (true) {
                Optional<HtmlNode> optional = parent;
                if (!optional.isPresent()) {
                    break;
                }
                linkedList.add(optional.get());
                parent = ((HtmlNode) optional.get()).getParent();
            }
            String[] strArr = this.mTextTokenizer.tokenize(trim);
            i2 = strArr.length;
            for (String str : strArr) {
                newHtmlNode.getMetrics().addTerm(str);
                Iterator it = linkedList.iterator();
                while (true) {
                    if (it.hasNext()) {
                        HtmlNode htmlNode2 = (HtmlNode) it.next();
                        if (NodeUtil.hasName(htmlNode2, TagConstants.A)) {
                            z = true;
                            break;
                        }
                        if (NodeUtil.hasName(htmlNode2, TagConstants.LI)) {
                            z2 = true;
                            break;
                        }
                    }
                }
            }
        }
        HtmlNodeMetrics metrics = newHtmlNode.getMetrics();
        metrics.setNumTokens(i2);
        metrics.setNumTextTokens(i2);
        metrics.setNumDivTokens(0);
        metrics.setNumTextLinkTokens(0);
        metrics.setNumTextListItemTokens(0);
        if (z) {
            metrics.setNumTextLinkTokens(i2);
        } else if (z2) {
            metrics.setNumTextListItemTokens(i2);
        }
        newHtmlNode.getChildNoBlockMetrics().add(metrics);
        return newHtmlNode;
    }

    private void checkAndSetMaxRankNode(HtmlNode htmlNode, BuildTreeInfo buildTreeInfo) {
        double rank = this.mNodeRanker.rank(htmlNode, this.mTextTokenizer);
        if (buildTreeInfo.nodeMaxRank == null || rank > buildTreeInfo.maxRank) {
            buildTreeInfo.nodeMaxRank = htmlNode;
            buildTreeInfo.maxRank = rank;
        }
    }

    private HtmlNode newHtmlNode(Node node, HtmlNode htmlNode, int i) {
        return htmlNode == null ? new HtmlNode(node, i, this) : new HtmlNode(node, htmlNode, i, this);
    }

    private int getNumTrailingTags(Node node) {
        if (node.nodeName().startsWith("#")) {
            return 0;
        }
        if (node.childNodeSize() > 1) {
            return 2;
        }
        String outerHtml = node.outerHtml();
        String str = "</" + node.nodeName() + ">";
        return outerHtml.lastIndexOf(str) == outerHtml.length() - str.length() ? 2 : 1;
    }

    private boolean skip(Node node) {
        return TagConstants.SCRIPT.equals(node.nodeName()) || TagConstants.NOSCRIPT.equals(node.nodeName()) || TagConstants.DATA.equals(node.nodeName()) || TagConstants.COMMENT.equals(node.nodeName()) || TagConstants.DOCTYPE.equals(node.nodeName()) || TagConstants.HEAD.equals(node.nodeName()) || TagConstants.STYLE.equals(node.nodeName());
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setTextTokenizer(TextTokenizer textTokenizer) {
        Preconditions.checkNotNull(textTokenizer);
        this.mTextTokenizer = textTokenizer;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setNodeRanker(HtmlNodeArticleRanker htmlNodeArticleRanker) {
        Preconditions.checkNotNull(htmlNodeArticleRanker);
        this.mNodeRanker = htmlNodeArticleRanker;
    }

    public Optional<HtmlNode> getNodeWithMaxRank() {
        Preconditions.checkState(this.mRoot != null, "No document parsed.");
        return this.mMaxRankNode == null ? Optional.absent() : Optional.of(this.mMaxRankNode);
    }

    public HtmlNode getRootNode() {
        Preconditions.checkState(this.mRoot != null, "No document parsed.");
        return this.mRoot;
    }

    public Optional<String> getTitle() {
        return this.mTitle;
    }

    public void consumeNodesInRange(HtmlNodeRange htmlNodeRange, Consumer<HtmlNode> consumer) {
        Preconditions.checkNotNull(htmlNodeRange);
        Preconditions.checkNotNull(consumer);
        Preconditions.checkState(this.mRoot != null, "No document parsed.");
        LinkedList linkedList = new LinkedList();
        linkedList.push(this.mRoot);
        while (!linkedList.isEmpty()) {
            HtmlNode htmlNode = (HtmlNode) linkedList.pop();
            if (htmlNodeRange.isNodeInRange(htmlNode)) {
                consumer.consume(htmlNode);
            }
            Iterator<HtmlNode> descendingIterator = htmlNode.getChildren().descendingIterator();
            while (descendingIterator.hasNext()) {
                linkedList.push(descendingIterator.next());
            }
        }
    }

    public List<HtmlNode> getAllNodesByName(String str, HtmlNodeRange htmlNodeRange) {
        Preconditions.checkNotNull(str);
        Preconditions.checkState(this.mRoot != null, "No document parsed.");
        LinkedList linkedList = new LinkedList();
        LinkedList linkedList2 = new LinkedList();
        linkedList2.push(this.mRoot);
        while (!linkedList2.isEmpty()) {
            HtmlNode htmlNode = (HtmlNode) linkedList2.pop();
            if (htmlNodeRange.isNodeInRange(htmlNode) && htmlNode.getName().equals(str)) {
                linkedList.add(htmlNode);
            }
            Iterator<HtmlNode> descendingIterator = htmlNode.getChildren().descendingIterator();
            while (descendingIterator.hasNext()) {
                linkedList2.push(descendingIterator.next());
            }
        }
        return linkedList;
    }

    public Optional<HtmlNode> getFirstNodeByName(String str, HtmlNodeRange htmlNodeRange) {
        Preconditions.checkNotNull(str);
        Preconditions.checkNotNull(htmlNodeRange);
        Preconditions.checkState(this.mRoot != null, "No document parsed.");
        LinkedList linkedList = new LinkedList();
        linkedList.push(this.mRoot);
        while (!linkedList.isEmpty()) {
            HtmlNode htmlNode = (HtmlNode) linkedList.pop();
            if (htmlNodeRange.isNodeInRange(htmlNode) && htmlNode.getName().equals(str)) {
                return Optional.of(htmlNode);
            }
            Iterator<HtmlNode> descendingIterator = htmlNode.getChildren().descendingIterator();
            while (descendingIterator.hasNext()) {
                linkedList.push(descendingIterator.next());
            }
        }
        return Optional.absent();
    }

    public Optional<HtmlNode> getLastNodeByName(String str, HtmlNodeRange htmlNodeRange) {
        Preconditions.checkNotNull(str);
        Preconditions.checkNotNull(htmlNodeRange);
        Preconditions.checkState(this.mRoot != null, "No document parsed.");
        HtmlNode htmlNode = null;
        LinkedList linkedList = new LinkedList();
        linkedList.push(this.mRoot);
        while (!linkedList.isEmpty()) {
            HtmlNode htmlNode2 = (HtmlNode) linkedList.pop();
            if (htmlNodeRange.isNodeInRange(htmlNode2) && htmlNode2.getName().equals(str)) {
                htmlNode = htmlNode2;
            }
            Iterator<HtmlNode> descendingIterator = htmlNode2.getChildren().descendingIterator();
            while (descendingIterator.hasNext()) {
                linkedList.push(descendingIterator.next());
            }
        }
        return htmlNode == null ? Optional.absent() : Optional.of(htmlNode);
    }
}
