package org.xmlcml.xhtml2stm.visitor.regex;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import nu.xom.Element;
import org.apache.log4j.Logger;
import org.xmlcml.xhtml2stm.result.ResultsElement;
import org.xmlcml.xhtml2stm.visitable.html.HtmlContainer;
import org.xmlcml.xhtml2stm.visitable.html.HtmlVisitable;
import org.xmlcml.xhtml2stm.visitable.image.ImageVisitable;
import org.xmlcml.xhtml2stm.visitable.pdf.PDFVisitable;
import org.xmlcml.xhtml2stm.visitable.svg.SVGVisitable;
import org.xmlcml.xhtml2stm.visitable.table.TableVisitable;
import org.xmlcml.xhtml2stm.visitable.xml.XMLContainer;
import org.xmlcml.xhtml2stm.visitable.xml.XMLVisitable;
import org.xmlcml.xhtml2stm.visitor.AbstractVisitor;
import org.xmlcml.xhtml2stm.visitor.ArgProcessor;

/* loaded from: input_file:org/xmlcml/xhtml2stm/visitor/regex/RegexVisitor.class */
public class RegexVisitor extends AbstractVisitor {
    static final Logger LOG = Logger.getLogger(RegexVisitor.class);
    private static final File REGEX_DIRECTORY_BASE = new File("src/main/resources/org/xmlcml/xhtml2stm/visitor/regex");
    private static final String REGEX_SUFFIX = ".xml";
    private static final String G = "-g";
    private static final String REGEX = "--regex";
    private Map<RegexComponent, Integer> totalCountMap;
    private RegexContainer regexContainer;
    private List<String> regexFiles;

    public RegexVisitor() {
        ensureAndFillRegexContainer();
        LOG.trace("created... RegexContainer");
    }

    private void ensureAndFillRegexContainer() {
        if (this.regexContainer == null) {
            this.regexContainer = new RegexContainer();
        }
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(HtmlVisitable htmlVisitable) {
        for (HtmlContainer htmlContainer : htmlVisitable.getHtmlContainerList()) {
            if (this.regexContainer.getCompoundRegexList() != null) {
                searchXomElement(htmlContainer.getHtmlElement());
                debugCountMap();
            }
            LOG.trace("==============" + htmlContainer.getLocation() + "==================");
        }
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(ImageVisitable imageVisitable) {
        notYetImplemented(imageVisitable);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(XMLVisitable xMLVisitable) {
        ensureRegexList();
        LOG.trace("visiting XML of " + xMLVisitable.getXMLContainerList().size() + " visitables");
        for (XMLContainer xMLContainer : xMLVisitable.getXMLContainerList()) {
            LOG.debug("visiting container with  " + (this.regexContainer.getCompoundRegexList() == null ? "null/zero" : Integer.valueOf(this.regexContainer.getCompoundRegexList().size())) + " compound regexes");
            if (this.regexContainer.getCompoundRegexList() != null) {
                searchXomElement(xMLContainer.getElement());
                debugCountMap();
            }
        }
        if (this.totalCountMap.size() > 0) {
            this.resultsElement = new ResultsElement();
            addCountMapTo(this.resultsElement, this.totalCountMap);
        }
    }

    private void addCountMapTo(ResultsElement resultsElement, Map<RegexComponent, Integer> map) {
        Iterator<RegexComponent> it = map.keySet().iterator();
        while (it.hasNext()) {
            resultsElement.appendChild(it.next().toXML());
        }
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(PDFVisitable pDFVisitable) {
        notYetImplemented(pDFVisitable);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(SVGVisitable sVGVisitable) {
        notYetImplemented(sVGVisitable);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(TableVisitable tableVisitable) {
        notYetImplemented(tableVisitable);
    }

    public List<CompoundRegex> getCompoundRegexList() {
        return this.regexContainer.getCompoundRegexList();
    }

    public CompoundRegex getCompoundRegex(String str) {
        return this.regexContainer.getCompoundRegexByTitle(str);
    }

    public void debug() {
        LOG.debug("regex list " + this.regexContainer.getCompoundRegexList());
        for (CompoundRegex compoundRegex : this.regexContainer.getCompoundRegexList()) {
            LOG.debug(compoundRegex.getTitle() + "/" + compoundRegex.getRegexValues().size());
        }
    }

    public static void main(String[] strArr) throws Exception {
        LOG.debug("running RegexVisitor " + strArr.length);
        new RegexVisitor().processArgs(strArr);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void usage() {
        super.usage();
    }

    private void debugCountMap() {
        ensureTotalCountMap();
        for (RegexComponent regexComponent : this.totalCountMap.keySet()) {
            LOG.debug(regexComponent.getPattern() + ": " + this.totalCountMap.get(regexComponent));
        }
    }

    private void searchXomElement(Element element) {
        LOG.debug("search XomElement with " + this.regexContainer.getCompoundRegexList().size() + " compoundRegexes");
        Iterator<CompoundRegex> it = this.regexContainer.getCompoundRegexList().iterator();
        while (it.hasNext()) {
            Map<RegexComponent, Integer> countMap = it.next().searchWithRegexComponents(element).getCountMap();
            ensureTotalCountMap();
            LOG.debug("Hits: " + countMap.keySet().size());
            Iterator<RegexComponent> it2 = countMap.keySet().iterator();
            while (it2.hasNext()) {
                recordResults(countMap, it2.next());
            }
        }
    }

    private void recordResults(Map<RegexComponent, Integer> map, RegexComponent regexComponent) {
        Integer num = map.get(regexComponent);
        if (num == null || num.intValue() == 0) {
            return;
        }
        Integer num2 = this.totalCountMap.get(regexComponent);
        this.totalCountMap.put(regexComponent, Integer.valueOf(Integer.valueOf(num2 == null ? 0 : num2.intValue()).intValue() + num.intValue()));
    }

    private void ensureTotalCountMap() {
        if (this.totalCountMap == null) {
            this.totalCountMap = new HashMap();
        }
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    protected void additionalUsage() {
        System.err.println();
        System.err.println("    -g   --regex     <regexDictionary> [<regexDictionary> ...] ");
        System.err.println("                   where dictionary 'foo' is found in src/main/resources/org/xmlcml/xhtml2stm/visitor/regex/foo.xml (NYI)");
        System.err.println("                   so --regex 'foo bar' uses both foo.xml and bar.xml in that order");
        System.err.println("    ");
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public String getDescription() {
        return "Regex: Applies regular expressions to HTML or XML Visitables to extract information.";
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public boolean processArg(String str, ListIterator<String> listIterator) {
        boolean z = false;
        if (G.equals(str) || REGEX.equals(str)) {
            this.regexFiles = extractArgs(listIterator);
            addRegexFiles(this.regexFiles);
            z = true;
        }
        return z;
    }

    private RegexContainer addRegexFiles(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            File file = new File(it.next());
            if (!file.exists() || file.isDirectory()) {
                throw new RuntimeException("Cannot find regexFile: " + file);
            }
            arrayList.add(file);
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            this.regexContainer.readCompoundRegexFile((File) it2.next());
        }
        LOG.debug("regex container " + this.regexContainer.getCompoundRegexList());
        return this.regexContainer;
    }

    protected List<String> extractArgs(ListIterator<String> listIterator) {
        ArrayList arrayList = new ArrayList();
        while (true) {
            if (!listIterator.hasNext()) {
                break;
            }
            String next = listIterator.next();
            if (next.startsWith(ArgProcessor.MINUS)) {
                listIterator.previous();
                break;
            }
            arrayList.add(next);
        }
        return arrayList;
    }

    private void ensureRegexList() {
        this.regexContainer.ensureCompoundRegexList();
    }

    public void addRegexFile(String str) {
    }
}
