package org.xmlcml.svg2xml.pdf;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.xmlcml.euclid.Int2Range;
import org.xmlcml.euclid.IntArray;
import org.xmlcml.euclid.IntMatrix;
import org.xmlcml.euclid.RealRangeArray;
import org.xmlcml.graphics.svg.SVGElement;
import org.xmlcml.graphics.svg.SVGG;
import org.xmlcml.graphics.svg.SVGImage;
import org.xmlcml.graphics.svg.SVGPath;
import org.xmlcml.graphics.svg.SVGText;
import org.xmlcml.graphics.svg.SVGUtil;
import org.xmlcml.svg2xml.container.AbstractContainer;
import org.xmlcml.svg2xml.container.ImageContainer;
import org.xmlcml.svg2xml.container.ScriptContainer;
import org.xmlcml.svg2xml.container.ShapeContainer;
import org.xmlcml.svg2xml.indexer.AbstractIndexer;
import org.xmlcml.svg2xml.indexer.AppendixIndexer;
import org.xmlcml.svg2xml.indexer.BibRefIndexer;
import org.xmlcml.svg2xml.indexer.ChapterIndexer;
import org.xmlcml.svg2xml.indexer.DOIIndexer;
import org.xmlcml.svg2xml.indexer.FigureIndexer;
import org.xmlcml.svg2xml.indexer.LicenceIndexer;
import org.xmlcml.svg2xml.indexer.MiscellaneousIndexer;
import org.xmlcml.svg2xml.indexer.SchemeIndexer;
import org.xmlcml.svg2xml.indexer.SummaryIndexer;
import org.xmlcml.svg2xml.indexer.TableIndexer;
import org.xmlcml.svg2xml.page.PageAnalyzer;
import org.xmlcml.svg2xml.text.ScriptLine;
import org.xmlcml.svg2xml.text.ScriptWord;
import org.xmlcml.svg2xml.util.SVG2XMLUtil;
import org.xmlcml.svg2xml.util.TextFlattener;

/* loaded from: input_file:org/xmlcml/svg2xml/pdf/PDFIndex.class */
public class PDFIndex {
    private static final String WHITESPACE = "[\n\r\u0085\u2028\u2029]";
    private static final String DUPLICATE = "duplicate";
    private static final String BBOX = "bbox";
    public static final String CONTENT = "content";
    private static final String FIRST_INTEGER = "firstInteger";
    private static final String FLATTENED = "flattened";
    public static final String IMAGE = "image";
    public static final String PATH = "path";
    public static final String CHUNK_TYPE = "chunkType";
    public static final String ABSTRACT = "abstract";
    public static final String APPENDIX = "appendix";
    public static final String BIBREF = "bibRef";
    public static final String CHAPTER = "chapter";
    public static final String DOI_CITE = "doiCite";
    public static final String FIGURE = "figure";
    public static final String LICENCE = "licence";
    public static final String SCHEME = "scheme";
    public static final String SNIPPET = "snippet";
    public static final String TABLE = "table";
    private int duplicateBboxCount;
    private int duplicateContentCount;
    private int duplicateFlattenedCount;
    private int duplicateFirstIntegerCount;
    private int duplicateImageCount;
    private int duplicatePathCount;
    private Map<ChunkId, SVGElement> svgElementByIdMap;
    private Multimap<String, ChunkId> svgIdByContentMap;
    private Multimap<String, ChunkId> svgIdByFlattenedMap;
    private Multimap<String, ChunkId> svgIdByFirstIntegerMap;
    private Multimap<String, ChunkId> svgIdByImageContentMap;
    private Multimap<String, ChunkId> svgIdByPathIdMap;
    private Multimap<String, ChunkId> svgIdByAppendixMap;
    private Multimap<String, ChunkId> svgIdByBibRefMap;
    private Multimap<String, ChunkId> svgIdByChapterMap;
    private Multimap<String, ChunkId> svgIdByFigureMap;
    private Multimap<String, ChunkId> svgIdBySchemeMap;
    private Multimap<String, ChunkId> svgIdByTableMap;
    private Multimap<Double, AbstractContainer> scriptContainerByBoldFontSize;
    private Multimap<Double, AbstractContainer> pathContainerByPathString;
    private Multimap<Double, AbstractContainer> imageContainerByImageString;
    private Multimap<Int2Range, ChunkId> bboxMap;
    PDFAnalyzer pdfAnalyzer;
    private List<List<ChunkId>> contentIdListList;
    private List<List<ChunkId>> flattenedIdListList;
    private List<List<ChunkId>> firstIntegerIdListList;
    private List<List<ChunkId>> imageIdListList;
    private List<List<ChunkId>> pathIdListList;
    private List<List<ChunkId>> bboxIdListList;
    Set<ChunkId> usedIdSet;
    private AppendixIndexer appendixAnalyzer;
    private BibRefIndexer bibRefAnalyzer;
    private ChapterIndexer chapterAnalyzer;
    private DOIIndexer doiAnalyzer;
    FigureIndexer figureAnalyzer;
    private LicenceIndexer licenceAnalyzer;
    private SchemeIndexer schemeAnalyzer;
    private MiscellaneousIndexer snippetAnalyzer;
    private SummaryIndexer summaryAnalyzer;
    private TableIndexer tableAnalyzer;
    private List<AbstractIndexer> analyzerList;
    public static final Logger LOG = Logger.getLogger(PDFIndex.class);
    private static final PrintStream SYSOUT = System.out;

    public PDFIndex(PDFAnalyzer pDFAnalyzer) {
        this.pdfAnalyzer = pDFAnalyzer;
    }

    public <T> List<List<ChunkId>> findDuplicates(String str, Multimap<T, ChunkId> multimap) {
        ArrayList arrayList = new ArrayList();
        for (T t : multimap.keySet()) {
            ArrayList arrayList2 = new ArrayList(Arrays.asList(multimap.get(t).toArray(new ChunkId[0])));
            removeUsedIds(arrayList2);
            Collections.sort(arrayList2);
            if (arrayList2.size() > 1) {
                LOG.trace("DUPLICATES: " + str + " >" + SVG2XMLUtil.trim(t.toString(), 15) + " ... < " + arrayList2);
                arrayList.add(arrayList2);
                addUsedIdList(arrayList2);
            }
        }
        LOG.trace("USED: " + this.usedIdSet);
        return arrayList;
    }

    private void addUsedIdList(List<ChunkId> list) {
        this.usedIdSet.addAll(list);
    }

    private void removeUsedIds(List<ChunkId> list) {
        for (int size = list.size() - 1; size >= 0; size--) {
            if (getUsedIdSet().contains(list.get(size))) {
                list.remove(size);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void ensureElementMultimaps() {
        if (this.svgIdByContentMap == null) {
            this.svgElementByIdMap = new HashMap();
            this.svgIdByContentMap = HashMultimap.create();
            this.svgIdByFlattenedMap = HashMultimap.create();
            this.svgIdByFirstIntegerMap = HashMultimap.create();
            this.svgIdByImageContentMap = HashMultimap.create();
            this.duplicateImageCount = 0;
            this.svgIdByPathIdMap = HashMultimap.create();
            this.duplicatePathCount = 0;
            this.bboxMap = HashMultimap.create();
            this.duplicateBboxCount = 0;
            this.scriptContainerByBoldFontSize = HashMultimap.create();
        }
    }

    private void ensureContainerMaps() {
        if (this.scriptContainerByBoldFontSize == null) {
            this.scriptContainerByBoldFontSize = HashMultimap.create();
            this.pathContainerByPathString = HashMultimap.create();
            this.imageContainerByImageString = HashMultimap.create();
        }
    }

    public void createIndexes() {
        this.contentIdListList = findDuplicates(CONTENT, this.svgIdByContentMap);
        markChunksAndNoteUsed(this.contentIdListList, CONTENT);
        printDuplicates(CONTENT, this.contentIdListList);
        this.flattenedIdListList = findDuplicates(FLATTENED, this.svgIdByFlattenedMap);
        markChunksAndNoteUsed(this.flattenedIdListList, FLATTENED);
        analyzeIntegers(this.flattenedIdListList);
        printDuplicates(FLATTENED, this.flattenedIdListList);
        this.imageIdListList = findDuplicates("image", this.svgIdByImageContentMap);
        markChunksAndNoteUsed(this.imageIdListList, "image");
        printDuplicates("image", this.imageIdListList);
        this.pathIdListList = findDuplicates("path", this.svgIdByPathIdMap);
        markChunksAndNoteUsed(this.pathIdListList, "path");
        printDuplicates("path", this.pathIdListList);
    }

    private void analyzeIntegers(List<List<ChunkId>> list) {
        for (List<ChunkId> list2 : list) {
            if (list2.size() > 0) {
                createTextFlattener(list2.get(0));
                ArrayList arrayList = new ArrayList();
                for (ChunkId chunkId : list2) {
                    LOG.trace("NYI");
                }
                try {
                    IntMatrix createByRows = IntMatrix.createByRows(arrayList);
                    LOG.trace("IM " + createByRows);
                    if (createByRows != null) {
                        for (int i = 0; i < createByRows.getCols(); i++) {
                            IntArray extractColumnData = createByRows.extractColumnData(i);
                            if (extractColumnData.isArithmeticProgression(1)) {
                                LOG.trace("PROG " + extractColumnData);
                            } else if (extractColumnData.getConstant() != null) {
                                LOG.trace("CONS " + extractColumnData);
                            }
                        }
                    }
                } catch (Exception e) {
                    LOG.error("IntMatrix bug" + arrayList, e);
                }
            }
        }
    }

    private TextFlattener createTextFlattener(ChunkId chunkId) {
        TextFlattener textFlattener = new TextFlattener();
        LOG.trace("Flattening pattern " + textFlattener.createIntegerPattern("NYI"));
        return textFlattener;
    }

    private void printDuplicates(String str, List<List<ChunkId>> list) {
    }

    public void setContentMap(Multimap<String, ChunkId> multimap) {
        this.svgIdByContentMap = multimap;
    }

    public void setDuplicateImageCount(int i) {
        this.duplicateImageCount = i;
    }

    public void setDuplicatePathCount(int i) {
        this.duplicatePathCount = i;
    }

    public void setImageMap(Multimap<String, ChunkId> multimap) {
        this.svgIdByImageContentMap = multimap;
    }

    public void setPathMap(Multimap<String, ChunkId> multimap) {
        this.svgIdByPathIdMap = multimap;
    }

    void addToindexes(SVGG svgg) {
        String value = svgg.getValue();
        value.replaceAll(WHITESPACE, " ");
        ChunkId chunkId = new ChunkId(svgg.getId());
        this.svgElementByIdMap.put(chunkId, svgg);
        indexByBoundingBox(svgg, chunkId);
        indexByTextContent(value, chunkId);
        indexByImageContent(svgg, chunkId);
        indexByPathContent(svgg, chunkId);
    }

    public void indexByTextContent(String str, ChunkId chunkId) {
        if (str.trim().length() > 0) {
            this.svgIdByContentMap.put(str, chunkId);
            indexByFlattenedIntegerContent(str, chunkId);
            indexByFirstIntegerContent(str, chunkId);
            indexByContentAnalyzers(str, chunkId);
        }
    }

    private void indexByContentAnalyzers(String str, ChunkId chunkId) {
        ensureSemanticAnalyzers();
        ensureUsedIdSet();
        if (this.bibRefAnalyzer.indexAndLabelChunk(str, chunkId) == null && this.appendixAnalyzer.indexAndLabelChunk(str, chunkId) == null && this.chapterAnalyzer.indexAndLabelChunk(str, chunkId) == null && this.schemeAnalyzer.indexAndLabelChunk(str, chunkId) == null && this.summaryAnalyzer.indexAndLabelChunk(str, chunkId) == null && this.licenceAnalyzer.indexAndLabelChunk(str, chunkId) == null && this.doiAnalyzer.indexAndLabelChunk(str, chunkId) == null && this.snippetAnalyzer.indexAndLabelChunk(str, chunkId) != null) {
        }
    }

    private void ensureSemanticAnalyzers() {
        if (this.figureAnalyzer == null) {
            this.analyzerList = new ArrayList();
            this.appendixAnalyzer = new AppendixIndexer(this);
            this.analyzerList.add(this.appendixAnalyzer);
            this.bibRefAnalyzer = new BibRefIndexer(this);
            this.analyzerList.add(this.bibRefAnalyzer);
            this.doiAnalyzer = new DOIIndexer(this);
            this.analyzerList.add(this.doiAnalyzer);
            this.chapterAnalyzer = new ChapterIndexer(this);
            this.analyzerList.add(this.chapterAnalyzer);
            this.figureAnalyzer = new FigureIndexer(this);
            this.analyzerList.add(this.figureAnalyzer);
            this.licenceAnalyzer = new LicenceIndexer(this);
            this.analyzerList.add(this.licenceAnalyzer);
            this.schemeAnalyzer = new SchemeIndexer(this);
            this.analyzerList.add(this.schemeAnalyzer);
            this.snippetAnalyzer = new MiscellaneousIndexer(this);
            this.analyzerList.add(this.snippetAnalyzer);
            this.summaryAnalyzer = new SummaryIndexer(this);
            this.analyzerList.add(this.summaryAnalyzer);
            this.tableAnalyzer = new TableIndexer(this);
            this.analyzerList.add(this.tableAnalyzer);
        }
    }

    private void indexByBoundingBox(SVGG svgg, ChunkId chunkId) {
        this.bboxMap.put(new Int2Range(svgg.getBoundingBox()), chunkId);
    }

    private void indexByPathContent(SVGG svgg, ChunkId chunkId) {
        List<SVGPath> extractPaths = SVGPath.extractPaths(SVGUtil.getQuerySVGElements(svgg, SVGPath.ALL_PATH_XPATH));
        if (extractPaths.size() > 0) {
            StringBuilder sb = new StringBuilder();
            Iterator<SVGPath> it = extractPaths.iterator();
            while (it.hasNext()) {
                sb.append(it.next().getDString());
            }
            this.svgIdByPathIdMap.put(sb.toString(), chunkId);
        }
    }

    private void indexByImageContent(SVGG svgg, ChunkId chunkId) {
        List<SVGImage> extractImages = SVGImage.extractImages(SVGUtil.getQuerySVGElements(svgg, SVGImage.ALL_IMAGE_XPATH));
        if (extractImages.size() > 0) {
            StringBuilder sb = new StringBuilder();
            Iterator<SVGImage> it = extractImages.iterator();
            while (it.hasNext()) {
                String imageValue = it.next().getImageValue();
                LOG.trace(SVG2XMLUtil.trim(imageValue, 50));
                sb.append(imageValue);
            }
            this.svgIdByImageContentMap.put(sb.toString(), chunkId);
        }
    }

    private String indexByFlattenedIntegerContent(String str, ChunkId chunkId) {
        String flattenDigitStrings = TextFlattener.flattenDigitStrings(str);
        LOG.trace(chunkId + "> " + flattenDigitStrings);
        this.svgIdByFlattenedMap.put(flattenDigitStrings, chunkId);
        LOG.trace(">> " + this.svgIdByFlattenedMap);
        return flattenDigitStrings;
    }

    private String indexByFirstIntegerContent(String str, ChunkId chunkId) {
        List<Object> splitAtIntegers = TextFlattener.splitAtIntegers(str);
        String str2 = null;
        if (splitAtIntegers.size() >= 2 && (splitAtIntegers.get(0) instanceof String) && (splitAtIntegers.get(1) instanceof Number)) {
            str2 = splitAtIntegers.get(0) + "0";
            this.svgIdByFirstIntegerMap.put(str2, chunkId);
        }
        return str2;
    }

    public void outputDuplicates(List<ChunkId> list, String str) {
        try {
            for (ChunkId chunkId : list) {
                File file = new File("target/" + str + "/duplicate/");
                file.mkdirs();
                File file2 = new File(file, chunkId + ".svg");
                SVGUtil.debug(this.svgElementByIdMap.get(chunkId), new FileOutputStream(file2), 1);
                LOG.trace("wrote: " + file2.getAbsolutePath());
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public void setBoundingBoxMap(Multimap<Int2Range, ChunkId> multimap) {
        this.bboxMap = multimap;
    }

    public void AnalyzeDuplicates() {
    }

    public void addUsedId(ChunkId chunkId) {
        ensureUsedIdSet();
        getUsedIdSet().add(chunkId);
    }

    private void ensureUsedIdSet() {
        if (getUsedIdSet() == null) {
            setUsedIdSet(new HashSet());
        }
    }

    public Set<ChunkId> getUsedIdSet() {
        return this.usedIdSet;
    }

    public void setUsedIdSet(Set<ChunkId> set) {
        this.usedIdSet = set;
    }

    public TableIndexer getTableAnalyzer() {
        ensureSemanticAnalyzers();
        return this.tableAnalyzer;
    }

    public FigureIndexer getFigureAnalyzer() {
        ensureSemanticAnalyzers();
        return this.figureAnalyzer;
    }

    public List<AbstractIndexer> getAnalyzerList() {
        ensureSemanticAnalyzers();
        return this.analyzerList;
    }

    private void markChunksAndNoteUsed(List<List<ChunkId>> list, String str) {
        for (int i = 0; i < list.size(); i++) {
            for (ChunkId chunkId : list.get(i)) {
            }
        }
    }

    public void addToindexes(PageAnalyzer pageAnalyzer) {
        for (AbstractContainer abstractContainer : pageAnalyzer.getAbstractContainerList()) {
            if (abstractContainer instanceof ScriptContainer) {
                ((ScriptContainer) abstractContainer).addToIndexes(this);
            } else if (abstractContainer instanceof ShapeContainer) {
                ((ShapeContainer) abstractContainer).addToIndexes(this);
            } else if (abstractContainer instanceof ImageContainer) {
                ((ImageContainer) abstractContainer).addToIndexes(this);
            } else {
                LOG.trace("Cannot index " + abstractContainer.getClass());
            }
        }
    }

    public void addToBoldIndex(Double d, ScriptContainer scriptContainer) {
        ensureContainerMaps();
        LOG.trace("Adding: " + d + " " + scriptContainer);
        this.scriptContainerByBoldFontSize.put(d, scriptContainer);
    }

    public void addToShapeIndex(String str, AbstractContainer abstractContainer) {
        LOG.trace("NYI Adding: " + str + " " + abstractContainer);
    }

    public void addToImageIndex(String str, ImageContainer imageContainer) {
        LOG.trace("NYI Adding: " + str + " " + imageContainer);
    }

    public void analyzeContainers() {
        analyzeScriptContainerIndexes();
    }

    private void analyzeScriptContainerIndexes() {
        Set<Double> keySet = this.scriptContainerByBoldFontSize.keySet();
        keySet.remove((Double) null);
        Double[] dArr = (Double[]) keySet.toArray(new Double[0]);
        if (dArr != null) {
            Arrays.sort(dArr);
            for (Double d : dArr) {
                LOG.trace("************* " + d);
                for (AbstractContainer abstractContainer : getListByKey(d)) {
                    if (abstractContainer instanceof ScriptContainer) {
                        Iterator<ScriptLine> it = ((ScriptContainer) abstractContainer).iterator();
                        while (it.hasNext()) {
                            ScriptLine next = it.next();
                            RealRangeArray wordRangeArray = next.getWordRangeArray();
                            wordRangeArray.sortAndRemoveOverlapping();
                            wordRangeArray.format(this.pdfAnalyzer.getDecimalPlaces());
                            LOG.trace("wordArray >>>>>>>> " + wordRangeArray);
                            for (SVGText sVGText : next.getSVGTextCharacters()) {
                                LOG.trace(sVGText.getValue() + "_" + sVGText.getX() + " ");
                            }
                            Iterator<ScriptWord> it2 = next.getScriptWordList().iterator();
                            while (it2.hasNext()) {
                                LOG.trace(" ~  " + it2.next().getRawValue());
                            }
                        }
                    }
                    LOG.trace("------" + abstractContainer.getRawValue());
                }
            }
        }
    }

    private List<AbstractContainer> getListByKey(Double d) {
        Collection<AbstractContainer> collection = this.scriptContainerByBoldFontSize.get(d);
        ArrayList arrayList = new ArrayList();
        Iterator<AbstractContainer> it = collection.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        return arrayList;
    }
}
