package org.xmlcml.svg2xml.analyzer;

import com.google.common.collect.Multimap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import nu.xom.Nodes;
import nu.xom.Text;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.spi.Configurator;
import org.xmlcml.cml.base.CMLUtil;
import org.xmlcml.euclid.EuclidConstants;
import org.xmlcml.euclid.Real2;
import org.xmlcml.graphics.svg.SVGElement;
import org.xmlcml.graphics.svg.SVGG;
import org.xmlcml.graphics.svg.SVGLine;
import org.xmlcml.graphics.svg.SVGSVG;
import org.xmlcml.graphics.svg.SVGTitle;
import org.xmlcml.graphics.svg.SVGUtil;
import org.xmlcml.html.HtmlElement;
import org.xmlcml.html.HtmlMenuSystem;
import org.xmlcml.pdf2svg.PDF2SVGConverter;
import org.xmlcml.svg2xml.action.PageEditorX;
import org.xmlcml.svg2xml.action.SVGPlusConstantsX;
import org.xmlcml.svg2xml.action.SemanticDocumentActionX;
import org.xmlcml.svg2xml.tools.Chunk;
import org.xmlcml.svg2xml.util.NameComparator;

/* loaded from: input_file:org/xmlcml/svg2xml/analyzer/PDFAnalyzer.class */
public class PDFAnalyzer {
    private static final Logger LOG = Logger.getLogger(PDFAnalyzer.class);
    private static final String HTTP = "http";
    public static final String Z_CHUNK = "z_";
    public static final String PAGE = "page";
    private File inputTopDir;
    private File inFile;
    private String inputName;
    private String fileRoot;
    private File svgDocumentDir;
    private File svgPageFile;
    File outputDocumentDir;
    private int pageNumber;
    private boolean skipFile;
    private DocumentListAnalyzer documentListAnalyzer;
    PDFIndex pdfIndex;
    private List<SVGSVG> svgOutList;
    private List<SVGG> gOutList;
    HtmlEditor htmlEditor;
    private File svgTopDir = new File("target/svg");
    private File outputTopDir = new File("target/output");

    public PDFAnalyzer() {
    }

    public PDFAnalyzer(DocumentListAnalyzer documentListAnalyzer) {
        this.documentListAnalyzer = documentListAnalyzer;
    }

    public void setInputTopDir(File file) {
        this.inputTopDir = file;
    }

    public void setSVGTopDir(File file) {
        this.svgTopDir = file;
    }

    public void setOutputTopDir(File file) {
        this.outputTopDir = file;
    }

    public void setFileRoot(String str) {
        this.fileRoot = str;
    }

    public void setSkipFile(boolean z) {
        this.skipFile = z;
    }

    private void analyzePDFs(String str) {
        if (str == null) {
            throw new RuntimeException("file/s must not be null");
        }
        if (!str.endsWith(".pdf")) {
            readFilenamesAndAnalyzePDFs(new File(str));
        } else if (str.startsWith("http")) {
            analyzePDFURL(str);
        } else {
            analyzePDFFile(new File(str));
        }
    }

    private void readFilenamesAndAnalyzePDFs(File file) {
        if (!file.exists()) {
            return;
        }
        if (file.isDirectory()) {
            File[] listFiles = file.listFiles(new FilenameFilter() { // from class: org.xmlcml.svg2xml.analyzer.PDFAnalyzer.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file2, String str) {
                    return str.endsWith(".pdf");
                }
            });
            if (listFiles == null || listFiles.length <= 0) {
                return;
            }
            for (File file2 : listFiles) {
                createAnalyzerAndAnalyzePDF(file2);
            }
            return;
        }
        File parentFile = file.getParentFile();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                if (!readLine.startsWith(EuclidConstants.S_HASH) && readLine.endsWith(".pdf")) {
                    readAndAnalyzeFile(parentFile, readLine);
                }
            }
        } catch (Exception e) {
            throw new RuntimeException("Cannot read listing file: " + file, e);
        }
    }

    private void readAndAnalyzeFile(File file, String str) {
        File file2 = new File(file, str);
        if (file2.exists()) {
            createAnalyzerAndAnalyzePDF(file2);
        } else {
            LOG.error("PDF file does not exist: " + file2);
        }
    }

    private void createAnalyzerAndAnalyzePDF(File file) {
        try {
            new PDFAnalyzer().analyzePDFFile(file);
        } catch (Exception e) {
            LOG.error("Cannot read file: " + file + " (" + e + EuclidConstants.S_RBRAK);
        }
    }

    private void analyzePDFURL(String str) {
        throw new RuntimeException("URL not yet implemented");
    }

    public void analyzePDFFile(File file) {
        this.inFile = file;
        this.inputName = file.getName();
        this.fileRoot = this.inputName.substring(0, this.inputName.length() - ".pdf".length());
        this.svgDocumentDir = new File(this.svgTopDir, this.fileRoot);
        this.outputDocumentDir = new File(this.outputTopDir, this.fileRoot);
        analyzePDF();
        File file2 = new File(this.outputTopDir, this.fileRoot);
        copyOriginalPDF(file, file2);
        extractEntities(file2);
        createHtmlMenuSystem(file2);
    }

    private void extractEntities(File file) {
    }

    private void copyOriginalPDF(File file, File file2) {
        try {
            IOUtils.copy(new FileInputStream(file), new FileOutputStream(new File(file2, "00_" + this.inputName)));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private List<File> analyzeHtml(File file) {
        ArrayList arrayList = new ArrayList();
        for (File file2 : file.listFiles()) {
            if (file2.toString().endsWith(SVGPlusConstantsX.DOT_HTML)) {
                arrayList.add(file2);
            }
        }
        return arrayList;
    }

    public void analyzePDF() {
        createSVGfromPDF();
        File[] listFiles = this.svgDocumentDir.listFiles();
        LOG.debug("listing Files in: " + this.svgDocumentDir);
        if (listFiles == null) {
            throw new RuntimeException("No files in " + this.svgDocumentDir);
        }
        this.htmlEditor = new HtmlEditor(this);
        ensurePDFIndex();
        this.pdfIndex.ensureElementMultimaps();
        for (int i = 0; i < listFiles.length; i++) {
            System.out.print(i + EuclidConstants.S_TILDE);
            createAndAnalyzeSVGChunks(i);
        }
        System.out.println();
        LOG.trace("IDS: " + this.pdfIndex.getUsedIdSet());
        this.pdfIndex.createIndexes();
        this.pdfIndex.AnalyzeDuplicates();
        writeSvgPages(listFiles);
        this.htmlEditor.getHtmlAnalyzerListSortedByChunkId();
        this.htmlEditor.removeDuplicates();
        this.htmlEditor.createLinkedElementList();
        this.htmlEditor.mergeCaptions();
        this.htmlEditor.categorizeHtml();
        this.htmlEditor.analyzeTables();
        this.htmlEditor.analyzeFigures();
        this.htmlEditor.outputHtmlElements();
    }

    private void writeSvgPages(File[] fileArr) {
        for (int i = 0; i < fileArr.length; i++) {
            SVGSVG svgsvg = this.svgOutList.get(i);
            annotatePage(svgsvg);
            writeSVGPage(i, svgsvg);
        }
    }

    private void annotatePage(SVGSVG svgsvg) {
        for (SVGG svgg : SVGG.extractGs(SVGUtil.getQuerySVGElements(svgsvg, ".//svg:g[@id]"))) {
            ChunkId chunkId = new ChunkId(svgg.getId());
            boolean contains = this.pdfIndex.getUsedIdSet().contains(chunkId);
            LOG.trace("ID written " + chunkId + EuclidConstants.S_SPACE + contains);
            if (contains) {
                Real2[] corners = svgg.getBoundingBox().getCorners();
                SVGLine sVGLine = new SVGLine(corners[0], corners[1]);
                sVGLine.setOpacity(0.3d);
                sVGLine.setWidth(5.0d);
                sVGLine.setFill("green");
                svgg.appendChild(sVGLine);
            }
        }
    }

    public void createSVGfromPDF() {
        LOG.trace("createSVG");
        PDF2SVGConverter pDF2SVGConverter = new PDF2SVGConverter();
        if (!this.inFile.exists()) {
            throw new RuntimeException("no input file: " + this.inFile);
        }
        File[] listFiles = this.svgDocumentDir == null ? null : this.svgDocumentDir.listFiles();
        if (this.svgDocumentDir.exists() && listFiles != null && listFiles.length != 0) {
            LOG.debug("Skipping SVG");
            return;
        }
        this.svgDocumentDir.mkdirs();
        LOG.debug("running " + this.inFile.toString() + " to " + this.svgDocumentDir.toString());
        pDF2SVGConverter.run(PDF2SVGConverter.OUTDIR, this.svgDocumentDir.toString(), this.inFile.toString());
    }

    private void createAndAnalyzeSVGChunksOld(int i) {
        ensurePDFIndex();
        ensureHtmlEditor();
        this.pageNumber = i;
        this.svgPageFile = new File(this.svgDocumentDir, this.fileRoot + "-" + createPageRoot(i) + ".svg");
        if (this.svgPageFile.exists() && this.skipFile) {
            LOG.debug("Skipping: " + this.svgPageFile);
            return;
        }
        LOG.debug("reading SVG " + this.svgPageFile);
        SVGSVG svgsvg = (SVGSVG) SVGElement.readAndCreateSVG(this.svgPageFile);
        LOG.trace("read and created SVG " + this.svgPageFile);
        processNonUnicodeCharactersInTitles(svgsvg);
        LOG.trace("processed nonUnicode");
        SemanticDocumentActionX createSemanticDocumentActionWithSVGPage = SemanticDocumentActionX.createSemanticDocumentActionWithSVGPage(svgsvg);
        LOG.trace("created documentAction");
        List<Chunk> chunkCreateWhitespaceChunkList = WhitespaceChunkerAnalyzerX.chunkCreateWhitespaceChunkList(createSemanticDocumentActionWithSVGPage);
        LOG.trace("made chunks - takes time");
        WhitespaceChunkerAnalyzerX.drawBoxes(chunkCreateWhitespaceChunkList, "red", "yellow", Double.valueOf(0.5d));
        LOG.trace("draw Boxes");
        List<SVGElement> generateElementList = SVGG.generateElementList(svgsvg, "svg:g/svg:g/svg:g[@edge='YMIN']");
        LOG.trace("read gList");
        SVGSVG createSVGOut = createSVGOut(i);
        this.svgOutList.add(createSVGOut);
        for (int i2 = 0; i2 < generateElementList.size(); i2++) {
            SVGG copyChunkAnalyzeMakeId = copyChunkAnalyzeMakeId(i, (SVGG) generateElementList.get(i2), i2);
            ensureGOutList();
            createSVGOut.appendChild(copyChunkAnalyzeMakeId);
            this.pdfIndex.addToindexes(copyChunkAnalyzeMakeId);
        }
        LOG.debug("read SVG " + this.svgPageFile);
    }

    private List<SVGElement> createSVGGListX() {
        ensurePDFIndex();
        ensureHtmlEditor();
        this.svgPageFile = new File(this.svgDocumentDir, this.fileRoot + "-" + createPageRoot(this.pageNumber) + ".svg");
        LOG.debug("reading SVG " + this.svgPageFile);
        SVGSVG svgsvg = (SVGSVG) SVGElement.readAndCreateSVG(this.svgPageFile);
        processNonUnicodeCharactersInTitles(svgsvg);
        WhitespaceChunkerAnalyzerX.drawBoxes(WhitespaceChunkerAnalyzerX.chunkCreateWhitespaceChunkList(SemanticDocumentActionX.createSemanticDocumentActionWithSVGPage(svgsvg)), "red", "yellow", Double.valueOf(0.5d));
        return SVGG.generateElementList(svgsvg, "svg:g/svg:g/svg:g[@edge='YMIN']");
    }

    private void createAndAnalyzeSVGChunks(int i) {
        this.pageNumber = i;
        List<SVGElement> createSVGGListX = createSVGGListX();
        SVGSVG createSVGOut = createSVGOut(i);
        this.svgOutList.add(createSVGOut);
        for (int i2 = 0; i2 < createSVGGListX.size(); i2++) {
            SVGG copyChunkAnalyzeMakeId = copyChunkAnalyzeMakeId(i, (SVGG) createSVGGListX.get(i2), i2);
            ensureGOutList();
            createSVGOut.appendChild(copyChunkAnalyzeMakeId);
            this.pdfIndex.addToindexes(copyChunkAnalyzeMakeId);
        }
        LOG.debug("read SVG " + this.svgPageFile);
    }

    HtmlEditor ensureHtmlEditor() {
        if (this.htmlEditor == null) {
            this.htmlEditor = new HtmlEditor(this);
        }
        return this.htmlEditor;
    }

    private void ensureGOutList() {
        if (this.gOutList == null) {
            this.gOutList = new ArrayList();
        }
    }

    private String createPageRoot(int i) {
        return "page" + (i + 1);
    }

    private SVGSVG createSVGOut(int i) {
        ensureSVGOutList();
        SVGSVG svgsvg = new SVGSVG();
        svgsvg.setWidth(600.0d);
        svgsvg.setHeight(800.0d);
        svgsvg.setId("p." + i);
        return svgsvg;
    }

    private void ensureSVGOutList() {
        if (this.svgOutList == null) {
            this.svgOutList = new ArrayList();
        }
    }

    private void processNonUnicodeCharactersInTitles(SVGSVG svgsvg) {
        Iterator<SVGElement> it = SVGUtil.getQuerySVGElements(svgsvg, ".//svg:title").iterator();
        while (it.hasNext()) {
            SVGTitle sVGTitle = (SVGTitle) it.next();
            String[] split = sVGTitle.getValue().split(EuclidConstants.S_SEMICOLON);
            Integer num = null;
            int length = split.length;
            int i = 0;
            while (true) {
                if (i >= length) {
                    break;
                }
                String[] split2 = split[i].split(EuclidConstants.S_COLON);
                if (split2[0].equals(PageEditorX.CHAR) && !split2[1].equals(Configurator.NULL)) {
                    num = new Integer(split2[1].trim());
                    break;
                } else {
                    if (split2[0].equals("name") && !split2[1].equals(Configurator.NULL)) {
                        num = 127;
                        break;
                    }
                    i++;
                }
            }
            SVGElement sVGElement = (SVGElement) sVGTitle.getParent();
            int childCount = sVGElement.getChildCount();
            for (int i2 = 0; i2 < childCount; i2++) {
                sVGElement.getChild(0).detach();
            }
            char intValue = (char) num.intValue();
            LOG.trace("> " + intValue);
            try {
                sVGElement.appendChild("" + intValue);
            } catch (Exception e) {
                LOG.trace("skipped problem character: " + ((int) intValue));
            }
        }
    }

    private void stripNewlines(SVGSVG svgsvg) {
        Nodes query = svgsvg.query("//text()");
        for (int i = 0; i < query.size(); i++) {
            Text text = (Text) query.get(i);
            String value = text.getValue();
            if (value.contains("\n")) {
                text.setValue(value.replaceAll("\n", ""));
            }
        }
    }

    private void ensurePDFIndex() {
        if (this.pdfIndex == null) {
            this.pdfIndex = new PDFIndex(this);
        }
    }

    private void writeSVGPage(int i, SVGSVG svgsvg) {
        try {
            String createPageRoot = createPageRoot(i);
            this.outputDocumentDir.mkdirs();
            String id = svgsvg.getId();
            LOG.trace("ID " + id);
            if (this.pdfIndex.getUsedIdSet().contains(id)) {
                LOG.trace("ANNOTATED: " + id);
            }
            CMLUtil.debug(svgsvg, new FileOutputStream(new File(this.outputDocumentDir, createPageRoot + ".svg")), 1);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private SVGG copyChunkAnalyzeMakeId(int i, SVGG svgg, int i2) {
        SVGG analyzeChunkInSVGPage = analyzeChunkInSVGPage(svgg, new ChunkId(i + 1, i2));
        CMLUtil.copyAttributes(svgg, analyzeChunkInSVGPage);
        return analyzeChunkInSVGPage;
    }

    public static List<List<String>> findDuplicates(String str, Multimap<? extends Object, String> multimap) {
        ArrayList arrayList = new ArrayList();
        for (Map.Entry<? extends Object, Collection<String>> entry : multimap.asMap().entrySet()) {
            Object key = entry.getKey();
            List asList = Arrays.asList(entry.getValue().toArray(new String[0]));
            Collections.sort(asList);
            if (asList.size() > 1) {
                LOG.trace("DUPLICATES: " + str + " >" + key + "< " + asList);
                arrayList.add(asList);
            }
        }
        return arrayList;
    }

    public SVGG analyzeChunkInSVGPage(SVGElement sVGElement, ChunkId chunkId) {
        AbstractPageAnalyzerX analyzer = AbstractPageAnalyzerX.getAnalyzer(sVGElement);
        SVGG labelChunk = analyzer.labelChunk();
        labelChunk.setId(chunkId.toString());
        HtmlAnalyzer htmlAnalyzer = new HtmlAnalyzer(this.htmlEditor, analyzer);
        HtmlElement createHtml = htmlAnalyzer.createHtml();
        if (createHtml != null) {
            this.htmlEditor.addHtmlElement(createHtml, chunkId);
            this.htmlEditor.indexHtmlBySvgId(htmlAnalyzer, chunkId);
        } else {
            LOG.warn("no html from: " + analyzer);
            if (analyzer instanceof TextAnalyzerX) {
                ((TextAnalyzerX) analyzer).debug();
            }
        }
        return labelChunk;
    }

    private void createHtmlMenuSystem(File file) {
        HtmlMenuSystem htmlMenuSystem = new HtmlMenuSystem();
        htmlMenuSystem.setOutdir(file.toString());
        File[] listFiles = file.listFiles();
        Arrays.sort(listFiles, new NameComparator());
        for (File file2 : listFiles) {
            htmlMenuSystem.addHRef(file2.toString());
        }
        try {
            htmlMenuSystem.outputMenuAndBottomAndIndexFrame();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public PDFIndex getIndex() {
        ensurePDFIndex();
        return this.pdfIndex;
    }

    public static void main(String[] strArr) {
        if (strArr.length != 0) {
            new PDFAnalyzer().analyzePDFs(strArr[0]);
            return;
        }
        System.out.println("PDFAnalyzer <inputFile(s)>");
        System.out.println("mvn exec:java -Dexec.mainClass=\"org.xmlcml.svg2xml.analyzer.PDFAnalyzer\"  -Dexec.args=\"src/test/resources/pdfs/bmc/1471-2180-11-174.pdf\"");
        System.out.println("OR java org.xmlcml.svg2xml.analyzer.PDFAnalyzer src/test/resources/pdfs/bmc/1471-2180-11-174.pdf");
        System.out.println("");
        System.out.println("input can be:");
        System.out.println("    (a) single PDF file as above (must end with \".pdf\")");
        System.out.println("    (b) directory containing one or more *.pdf");
        System.out.println("    (c) list of *.pdf files (relative to '.' or absolute)");
        System.out.println("    (d) URL (must start with http:// or https://) - NYI");
        System.exit(0);
    }
}
