package org.xmlcml.svg2xml.pdf;

import com.google.common.collect.Multimap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.xmlcml.euclid.EuclidConstants;
import org.xmlcml.graphics.svg.SVGSVG;
import org.xmlcml.html.HtmlDiv;
import org.xmlcml.html.HtmlElement;
import org.xmlcml.pdf2svg.PDF2SVGConverter;
import org.xmlcml.svg2xml.collection.DocumentListAnalyzer;
import org.xmlcml.svg2xml.page.PageAnalyzer;
import org.xmlcml.svg2xml.page.PageIO;

/* loaded from: input_file:org/xmlcml/svg2xml/pdf/PDFAnalyzer.class */
public class PDFAnalyzer {
    private static final Logger LOG = Logger.getLogger(PDFAnalyzer.class);
    static final PrintStream SYSOUT;
    public static final String Z_CHUNK = "z_";
    private PDFAnalyzerIO pdfIo;
    private DocumentListAnalyzer documentListAnalyzer;
    PDFIndex pdfIndex;
    private List<PageAnalyzer> pageAnalyzerList;
    private PDFAnalyzerOptions pdfOptions;
    private HtmlElement runningTextElement;

    public PDFAnalyzer() {
        this.pdfIo = new PDFAnalyzerIO(this);
        setPdfOptions(new PDFAnalyzerOptions(this));
    }

    public PDFAnalyzer(DocumentListAnalyzer documentListAnalyzer) {
        this();
        this.documentListAnalyzer = documentListAnalyzer;
    }

    public void setSVGTopDir(File file) {
        this.pdfIo.setSvgTopDir(file);
    }

    public void setOutputTopDir(File file) {
        this.pdfIo.setOutputTopDir(file);
    }

    public File getOutputTopDir() {
        return this.pdfIo.getOutputTopDir();
    }

    public void setFileRoot(String str) {
        this.pdfIo.setFileRoot(str);
    }

    public void analyzePDFs(String str) {
        if (str == null) {
            throw new RuntimeException("File/s must not be null");
        }
        if (!str.endsWith(".pdf")) {
            readFilenamesAndAnalyzePDFs(new File(str));
        } else if (str.startsWith("http")) {
            analyzePDFURL(str);
        } else {
            analyzePDFFile(new File(str));
        }
    }

    private void readFilenamesAndAnalyzePDFs(File file) {
        if (!file.exists()) {
            return;
        }
        if (file.isDirectory()) {
            File[] listFiles = file.listFiles(new FilenameFilter() { // from class: org.xmlcml.svg2xml.pdf.PDFAnalyzer.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file2, String str) {
                    return str.endsWith(".pdf");
                }
            });
            if (listFiles == null || listFiles.length <= 0) {
                return;
            }
            for (File file2 : listFiles) {
                createAnalyzerAndAnalyzePDF(file2);
            }
            return;
        }
        File parentFile = file.getParentFile();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                if (!readLine.startsWith("#") && readLine.endsWith(".pdf")) {
                    readAndAnalyzeFile(parentFile, readLine);
                }
            }
        } catch (Exception e) {
            throw new RuntimeException("Cannot read listing file: " + file, e);
        }
    }

    private void readAndAnalyzeFile(File file, String str) {
        File file2 = new File(file, str);
        if (file2.exists()) {
            createAnalyzerAndAnalyzePDF(file2);
        } else {
            LOG.error("PDF file does not exist: " + file2);
        }
    }

    private void createAnalyzerAndAnalyzePDF(File file) {
        try {
            new PDFAnalyzer().analyzePDFFile(file);
        } catch (Exception e) {
            LOG.error("Cannot read file: " + file + " (" + e + ")");
        }
    }

    private void analyzePDFURL(String str) {
        this.pdfIo.setPDFURL(str);
        analyzePDF();
    }

    public void analyzePDFFile(File file) {
        this.pdfIo.setUpPDF(file);
        analyzePDF();
    }

    private void analyzePDF() {
        ensurePDFIndex();
        createSVGFilesfromPDF();
        if (this.pdfIo.skipOutput(this.pdfOptions)) {
            LOG.debug("Skipped Output: " + this.pdfIo.outputDocumentDir);
        } else {
            analyzeRawSVGPagesWithPageAnalyzers();
        }
    }

    public void analyzeRawSVGPagesWithPageAnalyzers() {
        this.pageAnalyzerList = createAndFillPageAnalyzers();
        this.pdfIo.outputFiles(getPdfOptions());
        createIndexesAndRemoveDuplicates();
        try {
            FileUtils.copyDirectory(this.pdfIo.getRawSVGPageDirectory(), this.pdfIo.getExistingOutputDocumentDir(), new FileFilter() { // from class: org.xmlcml.svg2xml.pdf.PDFAnalyzer.2
                @Override // java.io.FileFilter
                public boolean accept(File file) {
                    return "png".equals(FilenameUtils.getExtension(file.getName()));
                }
            });
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void debugContainers() {
        Iterator<PageAnalyzer> it = this.pageAnalyzerList.iterator();
        while (it.hasNext()) {
            LOG.debug("\n============== " + it.next().toString());
        }
    }

    private void createIndexesAndRemoveDuplicates() {
        ensurePDFIndex();
        this.pdfIndex.ensureElementMultimaps();
        Iterator<PageAnalyzer> it = this.pageAnalyzerList.iterator();
        while (it.hasNext()) {
            this.pdfIndex.addToindexes(it.next());
        }
        this.pdfIndex.analyzeContainers();
        this.pdfIndex.createIndexes();
        this.pdfIndex.AnalyzeDuplicates();
        LOG.trace("IDS: " + this.pdfIndex.getUsedIdSet());
    }

    private List<PageAnalyzer> createAndFillPageAnalyzers() {
        File rawSVGPageDirectory = this.pdfIo.getRawSVGPageDirectory();
        List<File> collectRawSVGFiles = this.pdfIo.collectRawSVGFiles();
        ensurePageAnalyzerList();
        LOG.debug(rawSVGPageDirectory + " files: " + collectRawSVGFiles.size());
        for (int i = 0; i < collectRawSVGFiles.size(); i++) {
            SYSOUT.print(i + EuclidConstants.S_TILDE);
            this.pageAnalyzerList.add(PageAnalyzer.createAndAnalyze(collectRawSVGFiles.get(i), rawSVGPageDirectory, Integer.valueOf(i)));
        }
        return this.pageAnalyzerList;
    }

    public List<PageAnalyzer> createAndFillPageAnalyzers(List<SVGSVG> list) {
        ensurePageAnalyzerList();
        File rawSVGDirectory = this.pdfIo.getRawSVGDirectory();
        LOG.debug("raw svg " + rawSVGDirectory);
        for (int i = 0; i < list.size(); i++) {
            SYSOUT.print(i + EuclidConstants.S_TILDE);
            this.pageAnalyzerList.add(PageAnalyzer.createAndAnalyze(list.get(i), Integer.valueOf(i), rawSVGDirectory));
        }
        return this.pageAnalyzerList;
    }

    public HtmlElement createRunningHtml() {
        this.runningTextElement = new HtmlDiv();
        Iterator<PageAnalyzer> it = this.pageAnalyzerList.iterator();
        while (it.hasNext()) {
            PageIO.copyChildElementsFromTo(it.next().getRunningHtmlElement(), this.runningTextElement);
        }
        return this.runningTextElement;
    }

    public HtmlElement forceCreateRunningHtml() {
        this.runningTextElement = new HtmlDiv();
        Iterator<PageAnalyzer> it = this.pageAnalyzerList.iterator();
        while (it.hasNext()) {
            PageIO.copyChildElementsFromTo(it.next().createRunningHtml(), this.runningTextElement);
        }
        return this.runningTextElement;
    }

    private void ensurePageAnalyzerList() {
        if (this.pageAnalyzerList == null) {
            this.pageAnalyzerList = new ArrayList();
        }
    }

    public void createSVGFilesfromPDF() {
        LOG.trace("createSVG");
        PDF2SVGConverter pDF2SVGConverter = new PDF2SVGConverter();
        File inFile = this.pdfIo.getInFile();
        String inputName = this.pdfIo.getInputName();
        if (inFile != null && inFile.exists()) {
            createSVGFilesfromPDF(pDF2SVGConverter, inFile.toString());
        } else {
            if (inputName == null || !inputName.startsWith("http")) {
                throw new RuntimeException("no input file: " + inFile);
            }
            this.pdfIo.createHttpInputName(inputName);
            createSVGFilesfromPDF(pDF2SVGConverter, inputName);
        }
    }

    public void createSVGFilesfromPDF(PDF2SVGConverter pDF2SVGConverter, String str) {
        File rawSVGDirectory = this.pdfIo.getRawSVGDirectory();
        File[] listFiles = rawSVGDirectory == null ? null : rawSVGDirectory.listFiles();
        if (rawSVGDirectory.exists() && listFiles != null && listFiles.length != 0) {
            LOG.debug("Skipping SVG because files in (" + rawSVGDirectory + ") already exist: " + listFiles.length);
            return;
        }
        rawSVGDirectory.mkdirs();
        LOG.debug("running " + str + " to " + rawSVGDirectory.toString());
        pDF2SVGConverter.run(PDF2SVGConverter.OUTDIR, rawSVGDirectory.toString(), str);
    }

    private void ensurePDFIndex() {
        if (this.pdfIndex == null) {
            this.pdfIndex = new PDFIndex(this);
        }
    }

    public static List<List<String>> findDuplicates(String str, Multimap<? extends Object, String> multimap) {
        ArrayList arrayList = new ArrayList();
        for (Map.Entry<? extends Object, Collection<String>> entry : multimap.asMap().entrySet()) {
            Object key = entry.getKey();
            List asList = Arrays.asList(entry.getValue().toArray(new String[0]));
            Collections.sort(asList);
            if (asList.size() > 1) {
                LOG.trace("DUPLICATES: " + str + " >" + key + "< " + asList);
                arrayList.add(asList);
            }
        }
        return arrayList;
    }

    public PDFIndex getIndex() {
        ensurePDFIndex();
        return this.pdfIndex;
    }

    public static void main(String[] strArr) {
        if (strArr.length != 0) {
            new PDFAnalyzer().analyzePDFs(strArr[0]);
            return;
        }
        SYSOUT.println("PDFAnalyzer <inputFile(s)>");
        SYSOUT.println("mvn exec:java -Dexec.mainClass=\"org.xmlcml.svg2xml.analyzer.PDFAnalyzer\"  -Dexec.args=\"src/test/resources/pdfs/bmc/1471-2180-11-174.pdf\"");
        SYSOUT.println("OR java org.xmlcml.svg2xml.analyzer.PDFAnalyzer src/test/resources/pdfs/bmc/1471-2180-11-174.pdf");
        SYSOUT.println("");
        SYSOUT.println("input can be:");
        SYSOUT.println("    (a) single PDF file as above (must end with \".pdf\")");
        SYSOUT.println("    (b) directory containing one or more *.pdf");
        SYSOUT.println("    (c) list of *.pdf files (relative to '.' or absolute)");
        SYSOUT.println("    (d) URL (must start with http:// or https://) - NYI");
        System.exit(0);
    }

    public int getDecimalPlaces() {
        return 3;
    }

    public PDFAnalyzerIO getPDFIO() {
        return this.pdfIo;
    }

    public void setRawSvgDirectory(File file) {
        this.pdfIo.setRawSvgDirectory(file);
    }

    public List<PageAnalyzer> getPageAnalyzerList() {
        return this.pageAnalyzerList;
    }

    public HtmlElement getRunningTextHtml() {
        return this.runningTextElement;
    }

    public PDFAnalyzerOptions getPdfOptions() {
        return this.pdfOptions;
    }

    public void setPdfOptions(PDFAnalyzerOptions pDFAnalyzerOptions) {
        this.pdfOptions = pDFAnalyzerOptions;
    }

    public boolean getOutputHtmlChunks() {
        return this.pdfOptions.outputHtmlChunks;
    }

    public boolean getOutputFigures() {
        return this.pdfOptions.outputRawFigureHtml;
    }

    public boolean getOutputFooters() {
        return this.pdfOptions.outputFooters;
    }

    public boolean getOutputHeaders() {
        return this.pdfOptions.outputHeaders;
    }

    public boolean getOutputTables() {
        return this.pdfOptions.outputRawTableHtml;
    }

    public void setSkipOutput(boolean z) {
        this.pdfOptions.skipOutput = z;
    }

    static {
        LOG.setLevel(Level.DEBUG);
        SYSOUT = System.out;
    }
}
