package org.xmlcml.svg2xml.analyzer;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import nu.xom.Builder;
import nu.xom.Element;
import nu.xom.Nodes;
import org.apache.log4j.Logger;
import org.xmlcml.cml.base.CMLUtil;
import org.xmlcml.euclid.EuclidConstants;
import org.xmlcml.graphics.svg.SVGG;
import org.xmlcml.html.HtmlDiv;
import org.xmlcml.html.HtmlElement;
import org.xmlcml.html.HtmlLi;
import org.xmlcml.html.HtmlUl;
import org.xmlcml.svg2xml.text.TextLineContainer;

/* loaded from: input_file:org/xmlcml/svg2xml/analyzer/HtmlEditor.class */
public class HtmlEditor {
    private static final Logger LOG = Logger.getLogger(HtmlEditor.class);
    private static final String TEXT = "TEXT";
    private List<HtmlAnalyzer> htmlAnalyzerListSortedByChunkId;
    private PDFAnalyzer pdfAnalyzer;
    private Map<ChunkId, HtmlAnalyzer> htmlAnalyzerByIdMap;
    private List<HtmlAnalyzer> figureHtmlAnalyzerList;
    private List<HtmlAnalyzer> tableHtmlAnalyzerList;
    private List<HtmlAnalyzer> mergedHtmlAnalyzerList;
    private HtmlAnalyzer textDivAnalyzer;

    public HtmlEditor(PDFAnalyzer pDFAnalyzer) {
        this.pdfAnalyzer = pDFAnalyzer;
    }

    public void accept(HtmlVisitor htmlVisitor) {
        htmlVisitor.visit(this);
    }

    public void categorizeHtml() {
        LOG.debug("Merging HTML");
        HtmlDiv htmlDiv = new HtmlDiv();
        createTextDivAnalyzer(htmlDiv);
        HtmlAnalyzer htmlAnalyzer = null;
        this.figureHtmlAnalyzerList = new ArrayList();
        this.tableHtmlAnalyzerList = new ArrayList();
        this.mergedHtmlAnalyzerList = new ArrayList();
        for (HtmlAnalyzer htmlAnalyzer2 : this.htmlAnalyzerListSortedByChunkId) {
            String id = htmlAnalyzer2.getId();
            String classAttribute = htmlAnalyzer2.getClassAttribute();
            String str = classAttribute == null ? null : classAttribute.split(EuclidConstants.S_WHITEREGEX)[0];
            LOG.trace("Class " + classAttribute + EuclidConstants.S_SPACE + str + EuclidConstants.S_SPACE + htmlAnalyzer2.getAnalyzer());
            if (classAttribute == null) {
                this.mergedHtmlAnalyzerList.add(htmlAnalyzer2);
                LOG.trace("merging " + id);
                merge(htmlAnalyzer, htmlAnalyzer2, htmlDiv);
                htmlAnalyzer = htmlAnalyzer2;
            } else if ("omit".equals(classAttribute)) {
                LOG.trace("OMITTED " + id);
            } else if ("FIGURE".equals(str)) {
                htmlAnalyzer2.setChunkType(str);
                this.figureHtmlAnalyzerList.add(htmlAnalyzer2);
                LOG.trace(classAttribute + " = " + id);
            } else if ("TABLE".equals(str)) {
                htmlAnalyzer2.setId(id);
                htmlAnalyzer2.setChunkType(str);
                this.tableHtmlAnalyzerList.add(htmlAnalyzer2);
                LOG.trace(classAttribute + " = " + id);
            } else {
                LOG.trace("untreated CLASS " + classAttribute);
            }
        }
    }

    private void createTextDivAnalyzer(HtmlDiv htmlDiv) {
        this.textDivAnalyzer = new HtmlAnalyzer(htmlDiv, this);
        this.textDivAnalyzer.setClassAttribute("TEXT");
        this.textDivAnalyzer.setChunkType("TEXT");
        this.textDivAnalyzer.setSerial(1);
        this.textDivAnalyzer.setId("t.1.0");
    }

    private void merge(HtmlAnalyzer htmlAnalyzer, HtmlAnalyzer htmlAnalyzer2, HtmlDiv htmlDiv) {
        Element element;
        TextLineContainer textLineContainer = htmlAnalyzer == null ? null : htmlAnalyzer.getTextLineContainer();
        TextLineContainer textLineContainer2 = htmlAnalyzer2.getTextLineContainer();
        boolean z = false;
        if (textLineContainer != null && textLineContainer2 != null && textLineContainer.endsWithRaggedLine() && textLineContainer2.startsWithRaggedLine()) {
            z = htmlAnalyzer2.mergeLinesWithPrevious(htmlAnalyzer, htmlDiv);
        }
        if (z) {
            return;
        }
        htmlAnalyzer2.addIdSeparator(htmlDiv);
        try {
            element = HtmlElement.create(htmlAnalyzer2.getHtmlElement());
        } catch (Exception e) {
            LOG.debug("cannot create HTML: " + e);
            element = (Element) htmlAnalyzer2.getHtmlElement().copy();
        }
        htmlDiv.appendChild(element);
    }

    public void removeDuplicates() {
        getHtmlAnalyzerListSortedByChunkId();
        for (HtmlAnalyzer htmlAnalyzer : this.htmlAnalyzerListSortedByChunkId) {
            ChunkId chunkId = new ChunkId(htmlAnalyzer.getId());
            if (this.pdfAnalyzer.pdfIndex.usedIdSet.contains(chunkId)) {
                String classAttribute = htmlAnalyzer.getClassAttribute();
                LOG.trace(chunkId + EuclidConstants.S_SPACE + classAttribute);
                if (classAttribute == null) {
                    LOG.trace("skip duplicate: " + chunkId + EuclidConstants.S_SPACE + classAttribute);
                    htmlAnalyzer.setClassAttribute("omit");
                }
            }
        }
    }

    public void outputHtmlElements() {
        LOG.debug("figures HTML");
        Iterator<HtmlAnalyzer> it = this.figureHtmlAnalyzerList.iterator();
        while (it.hasNext()) {
            it.next().outputElementAsHtml(this.pdfAnalyzer.outputDocumentDir);
        }
        LOG.debug("tables HTML");
        Iterator<HtmlAnalyzer> it2 = this.tableHtmlAnalyzerList.iterator();
        while (it2.hasNext()) {
            it2.next().outputElementAsHtml(this.pdfAnalyzer.outputDocumentDir);
        }
        LOG.debug("merged HTML");
        Iterator<HtmlAnalyzer> it3 = this.mergedHtmlAnalyzerList.iterator();
        while (it3.hasNext()) {
            it3.next().outputElementAsHtml(this.pdfAnalyzer.outputDocumentDir);
        }
        LOG.debug("merged TEXT");
        this.textDivAnalyzer.outputElementAsHtml(this.pdfAnalyzer.outputDocumentDir);
    }

    public void mergeCaptions() {
        for (HtmlAnalyzer htmlAnalyzer : this.htmlAnalyzerListSortedByChunkId) {
            if ("FIGURE".equals(htmlAnalyzer.addTypeSerialAttributes())) {
                LOG.trace("FIG FIX");
                if (htmlAnalyzer.containsDivImage()) {
                    LOG.trace("***********IMG************");
                } else {
                    htmlAnalyzer.addImageDivTo(htmlAnalyzer.getPreviousHtmlAnalyzer(this.htmlAnalyzerByIdMap));
                }
            }
        }
    }

    public HtmlUl searchHtml(List<File> list, String str, Pattern pattern) {
        HashSet hashSet = new HashSet();
        HtmlUl htmlUl = null;
        for (File file : list) {
            Element element = null;
            try {
                element = new Builder().build(file).getRootElement();
            } catch (Exception e) {
                LOG.error("Failed on html File: " + file);
            }
            if (element != null) {
                htmlUl = new HtmlUl();
                searchHtml(str, pattern, htmlUl, hashSet, element);
            }
        }
        return htmlUl;
    }

    private void searchHtml(String str, Pattern pattern, HtmlUl htmlUl, Set<String> set, Element element) {
        Nodes query = element.query(str);
        for (int i = 0; i < query.size(); i++) {
            String value = query.get(i).getValue();
            if (pattern.matcher(value).matches() && !set.contains(value)) {
                LOG.trace(value);
                HtmlLi htmlLi = new HtmlLi();
                htmlUl.appendChild(htmlLi);
                htmlLi.setValue(value);
                set.add(value);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public List<HtmlAnalyzer> getHtmlAnalyzerListSortedByChunkId() {
        if (this.htmlAnalyzerListSortedByChunkId == null) {
            List<ChunkId> asList = Arrays.asList(this.htmlAnalyzerByIdMap.keySet().toArray(new ChunkId[0]));
            Collections.sort(asList);
            this.htmlAnalyzerListSortedByChunkId = new ArrayList();
            for (ChunkId chunkId : asList) {
                HtmlAnalyzer htmlAnalyzer = this.htmlAnalyzerByIdMap.get(chunkId);
                htmlAnalyzer.setId(chunkId.toString());
                this.htmlAnalyzerListSortedByChunkId.add(htmlAnalyzer);
            }
        }
        return this.htmlAnalyzerListSortedByChunkId;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Map<ChunkId, HtmlAnalyzer> getHtmlAnalyzerByIdMap() {
        ensureHtmlAnalyzerByIdMap();
        return this.htmlAnalyzerByIdMap;
    }

    public void createLinkedElementList() {
        getHtmlAnalyzerListSortedByChunkId();
        HtmlAnalyzer htmlAnalyzer = null;
        for (HtmlAnalyzer htmlAnalyzer2 : this.htmlAnalyzerListSortedByChunkId) {
            htmlAnalyzer2.addLinks(htmlAnalyzer);
            htmlAnalyzer = htmlAnalyzer2;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public String getValueFromHtml(ChunkId chunkId) {
        return getHtmlAnalyzerByIdMap().get(chunkId).getValue();
    }

    protected HtmlAnalyzer getHtmlAnalyzer(ChunkId chunkId) {
        ensureHtmlAnalyzerByIdMap();
        if (this.htmlAnalyzerByIdMap == null) {
            return null;
        }
        return this.htmlAnalyzerByIdMap.get(chunkId);
    }

    protected void ensureHtmlAnalyzerByIdMap() {
        if (this.htmlAnalyzerByIdMap == null) {
            this.htmlAnalyzerByIdMap = new HashMap();
        }
    }

    public SVGG labelChunk() {
        throw new RuntimeException("NYI");
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void indexHtmlBySvgId(HtmlAnalyzer htmlAnalyzer, ChunkId chunkId) {
        ensureHtmlAnalyzerByIdMap();
        this.htmlAnalyzerByIdMap.put(chunkId, htmlAnalyzer);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void labelChunk(ChunkId chunkId, String str, Integer num) {
        getHtmlAnalyzer(chunkId);
        HtmlAnalyzer htmlAnalyzer = getHtmlAnalyzer(chunkId);
        if (htmlAnalyzer != null) {
            htmlAnalyzer.addClassAttributeIfMissing(str, num);
        }
    }

    public HtmlUl searchHtml(String str, Pattern pattern) {
        throw new RuntimeException("NYI");
    }

    public void addHtmlElement(HtmlElement htmlElement, ChunkId chunkId) {
        getHtmlAnalyzerByIdMap().put(chunkId, new HtmlAnalyzer(htmlElement, this));
    }

    public void analyzeFigures() {
        Iterator<HtmlAnalyzer> it = this.figureHtmlAnalyzerList.iterator();
        while (it.hasNext()) {
            createFigureAnalyzer(it.next()).analyze();
        }
    }

    private FigureAnalyzerX createFigureAnalyzer(HtmlAnalyzer htmlAnalyzer) {
        FigureAnalyzerX figureAnalyzerX = null;
        AbstractPageAnalyzerX analyzer = htmlAnalyzer.getAnalyzer();
        if (analyzer instanceof MixedAnalyzer) {
            figureAnalyzerX = new FigureAnalyzerX(((MixedAnalyzer) analyzer).getTextAnalyzer(), ((MixedAnalyzer) analyzer).getPathAnalyzer(), ((MixedAnalyzer) analyzer).getImageAnalyzer());
        } else if (analyzer instanceof TextAnalyzerX) {
            figureAnalyzerX = new FigureAnalyzerX((TextAnalyzerX) analyzer, (PathAnalyzerX) null, (ImageAnalyzerX) null);
        }
        return figureAnalyzerX;
    }

    public void analyzeTables() {
        for (HtmlAnalyzer htmlAnalyzer : this.tableHtmlAnalyzerList) {
            HtmlElement analyze1 = createTableAnalyzer(htmlAnalyzer).analyze1();
            HtmlElement htmlElement = htmlAnalyzer.getHtmlElement();
            if (htmlElement != null) {
                CMLUtil.copyAttributes(htmlElement, analyze1);
            }
            htmlAnalyzer.setHtmlElement(analyze1);
        }
    }

    private TableAnalyzerX createTableAnalyzer(HtmlAnalyzer htmlAnalyzer) {
        TableAnalyzerX tableAnalyzerX = null;
        AbstractPageAnalyzerX analyzer = htmlAnalyzer.getAnalyzer();
        if (analyzer instanceof MixedAnalyzer) {
            MixedAnalyzer mixedAnalyzer = (MixedAnalyzer) analyzer;
            LOG.trace("M " + mixedAnalyzer);
            TextAnalyzerX textAnalyzer = mixedAnalyzer.getTextAnalyzer();
            if (textAnalyzer == null) {
                LOG.error("Table has no text so cannot process");
                return null;
            }
            if (mixedAnalyzer.getImageAnalyzer() != null) {
                LOG.error("Cannot currently analyze images in Tables");
                return null;
            }
            tableAnalyzerX = new TableAnalyzerX(textAnalyzer, mixedAnalyzer.getPathAnalyzer());
        } else if (analyzer instanceof TextAnalyzerX) {
            tableAnalyzerX = new TableAnalyzerX((TextAnalyzerX) analyzer, null);
        }
        return tableAnalyzerX;
    }
}
