package org.xmlcml.ami2.plugins;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nu.xom.Attribute;
import nu.xom.Element;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.xmlcml.ami2.dictionary.DefaultAMIDictionary;
import org.xmlcml.cmine.files.AbstractSearcher;
import org.xmlcml.cmine.files.ResultElement;
import org.xmlcml.cmine.files.ResultsElement;
import org.xmlcml.cmine.lookup.AbstractLookup;
import org.xmlcml.cmine.lookup.DefaultStringDictionary;
import org.xmlcml.xml.XPathGenerator;

/* loaded from: input_file:org/xmlcml/ami2/plugins/AMISearcher.class */
public class AMISearcher extends AbstractSearcher {
    private static final String NOT_FOUND = "NOT_FOUND";
    public static final Logger LOG = Logger.getLogger(AMISearcher.class);
    private String exactMatch;
    private AbstractLookup lookup;
    private NamedPattern namedPattern;
    private AMIArgProcessor amiArgProcessor;
    public static int DEFAULT_POST_WORD_COUNT;
    public static int DEFAULT_PRE_WORD_COUNT;
    public static final String EXACT = "exact";
    public static final String POST = "post";
    public static final String PRE = "pre";
    protected Integer[] contextCounts;
    protected DefaultStringDictionary dictionary;
    public int maxPostWordCount;
    public int maxPreWordCount;
    protected String name;
    public Pattern pattern;
    public List<String> stringList;

    public AMISearcher(AMIArgProcessor aMIArgProcessor) {
        this.maxPostWordCount = DEFAULT_POST_WORD_COUNT;
        this.maxPreWordCount = DEFAULT_PRE_WORD_COUNT;
        this.amiArgProcessor = aMIArgProcessor;
        this.contextCounts = aMIArgProcessor.getContextCount();
        if (this.amiArgProcessor == null) {
            throw new RuntimeException("null argProcessor");
        }
    }

    public AMISearcher(AMIArgProcessor aMIArgProcessor, NamedPattern namedPattern) {
        this(aMIArgProcessor);
        setNamedPattern(namedPattern);
    }

    public AMISearcher(AMIArgProcessor aMIArgProcessor, DefaultStringDictionary defaultStringDictionary) {
        this(aMIArgProcessor);
        setDictionary(defaultStringDictionary);
        this.name = defaultStringDictionary.getTitle();
    }

    protected void matchAndAddPrePost(String str, Matcher matcher, ResultElement resultElement) {
        String group = matcher.group(0);
        int start = matcher.start();
        int max = Math.max(0, start - this.contextCounts[0].intValue());
        int end = matcher.end();
        int min = Math.min(str.length(), end + this.contextCounts[1].intValue());
        resultElement.setPre(flattenHtmlInlineTags(str.substring(max, start)));
        resultElement.setExact(flattenHtmlInlineTags(group));
        resultElement.setPost(flattenHtmlInlineTags(str.substring(end, min)));
        lookupMatchAndAddLookupRefs(resultElement);
    }

    private void lookupMatchAndAddLookupRefs(ResultElement resultElement) {
        Map<String, AbstractLookup> orCreateLookupInstanceByName = this.amiArgProcessor.getOrCreateLookupInstanceByName();
        for (String str : orCreateLookupInstanceByName.keySet()) {
            AbstractLookup abstractLookup = orCreateLookupInstanceByName.get(str);
            Map<String, String> orCreateLookupRefByMatch = abstractLookup.getOrCreateLookupRefByMatch();
            String str2 = orCreateLookupRefByMatch.get(this.exactMatch);
            if (str2 == null) {
                try {
                    str2 = abstractLookup.lookup(this.exactMatch);
                } catch (IOException e) {
                    LOG.debug("Cannot find match: " + this.exactMatch + " in " + str);
                }
                str2 = str2 == null ? NOT_FOUND : str2;
                orCreateLookupRefByMatch.put(this.exactMatch, str2);
            }
            if (!NOT_FOUND.equals(str2)) {
                resultElement.addAttribute(new Attribute(str, str2));
            }
        }
    }

    protected String flattenHtmlInlineTags(String str) {
        return str.replaceAll("<[^>]*>", "");
    }

    public ResultElement createResultElement(String str, Matcher matcher) {
        ResultElement createResultElement = createResultElement();
        matchAndAddPrePost(str, matcher, createResultElement);
        return createResultElement;
    }

    public ResultElement createResultElement() {
        return new AMIResultElement();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addXpathAndAddtoResultsElement(Element element, ResultsElement resultsElement, ResultsElement resultsElement2) {
        if (resultsElement2 == null) {
            LOG.warn("null resultsElement");
            return;
        }
        Iterator<ResultElement> it = resultsElement2.iterator();
        while (it.hasNext()) {
            ResultElement next = it.next();
            next.detach();
            resultsElement.setXPath(new XPathGenerator(element).getXPath());
            resultsElement.appendChild(next);
        }
    }

    public AMIArgProcessor getArgProcessor() {
        return this.amiArgProcessor;
    }

    public String getTitle() {
        if (this.dictionary == null) {
            return null;
        }
        return this.dictionary.getTitle();
    }

    public void setNamedPattern(NamedPattern namedPattern) {
        this.namedPattern = namedPattern;
        this.pattern = namedPattern == null ? null : namedPattern.getPattern();
        this.name = namedPattern == null ? null : namedPattern.getName();
    }

    public int canFitTrailing(List<List<String>> list, List<String> list2, int i) {
        for (List<String> list3 : list) {
            LOG.trace("match: " + list2.get(i));
            boolean z = true;
            int i2 = 0;
            while (true) {
                if (i2 >= list3.size()) {
                    break;
                }
                int i3 = i + 1 + i2;
                if (i3 >= list2.size()) {
                    z = false;
                } else if (!matchIncludingTrailingPunctuation(list2.get(i3), list3.get(i2))) {
                    z = false;
                }
                if (!z) {
                    LOG.trace(">> " + list2.get(i3));
                    break;
                }
                i2++;
            }
            if (z) {
                return i2;
            }
        }
        return -1;
    }

    public List<String> createExactStringList(int i, int i2) {
        ArrayList arrayList = new ArrayList();
        for (int i3 = i; i3 <= i + i2; i3++) {
            arrayList.add(this.stringList.get(i3));
        }
        return arrayList;
    }

    public List<String> createPostStringList(int i) {
        ArrayList arrayList = new ArrayList();
        for (int i2 = i + 1; i2 < Math.min(this.stringList.size(), i + getMaxPostWordCount()); i2++) {
            arrayList.add(this.stringList.get(i2));
        }
        return arrayList;
    }

    public List<String> createPreStringList(int i) {
        ArrayList arrayList = new ArrayList();
        for (int max = Math.max(0, i - getMaxPreWordCount()); max < i; max++) {
            arrayList.add(this.stringList.get(max));
        }
        return arrayList;
    }

    public ResultElement createResultElement(List<String> list, int i, int i2) {
        this.stringList = list;
        ResultElement resultElement = new ResultElement();
        resultElement.addAttribute(new Attribute("pre", StringUtils.join(createPreStringList(i).iterator(), " ")));
        resultElement.addAttribute(new Attribute(EXACT, StringUtils.join(createExactStringList(i, i2).iterator(), " ")));
        resultElement.addAttribute(new Attribute("post", StringUtils.join(createPostStringList(i + i2).iterator(), " ")));
        return resultElement;
    }

    protected ResultElement createResultElement(String str, DefaultStringDictionary defaultStringDictionary) {
        throw new RuntimeException("createResultElement(dictionary) NYI");
    }

    public DefaultStringDictionary getDictionary() {
        return this.dictionary;
    }

    public int getMaxPostWordCount() {
        return this.maxPostWordCount;
    }

    public int getMaxPreWordCount() {
        return this.maxPreWordCount;
    }

    @Override // org.xmlcml.cmine.files.AbstractSearcher
    public String getName() {
        return this.name;
    }

    protected Pattern getPattern() {
        return this.pattern;
    }

    public String getValue(Element element) {
        return element.getValue();
    }

    public boolean matchIncludingTrailingPunctuation(String str, String str2) {
        int length = str.length() - str2.length();
        if (length < 0 || length > 1) {
            return false;
        }
        if (str.equals(str2)) {
            return true;
        }
        if (length != 1 || !str.startsWith(str2)) {
            return false;
        }
        char charAt = str.charAt(str.length() - 1);
        return charAt == ';' || charAt == ',' || charAt == '.' || charAt == '!' || charAt == '?';
    }

    public ResultsElement search(List<? extends Element> list, ResultsElement resultsElement) {
        for (Element element : list) {
            addXpathAndAddtoResultsElement(element, resultsElement, searchXomElement(element));
        }
        postProcessResultsElement(resultsElement);
        markFalsePositives(resultsElement, getOrCreateCurrentDictionary());
        return resultsElement;
    }

    private DefaultAMIDictionary getOrCreateCurrentDictionary() {
        return getArgProcessor().getOrCreateCurrentDictionary();
    }

    public ResultsElement searchXomElement(Element element) {
        return search(getValue(element));
    }

    public ResultsElement search(String str) {
        ResultsElement resultsElement = null;
        if (getDictionary() != null) {
            resultsElement = searchWithDictionary(str);
        } else if (getPattern() != null) {
            resultsElement = searchWithPattern(str);
        }
        return resultsElement;
    }

    public ResultsElement searchWithDictionary(List<String> list) {
        int canFitTrailing;
        ResultsElement resultsElement = new ResultsElement();
        for (int i = 0; i < list.size(); i++) {
            List<List<String>> trailingWords = this.dictionary.getTrailingWords(list.get(i));
            if (trailingWords != null && (canFitTrailing = canFitTrailing(trailingWords, list, i)) != -1) {
                resultsElement.appendChild(createResultElement(list, i, canFitTrailing));
            }
        }
        return resultsElement;
    }

    private ResultsElement searchWithDictionary(String str) {
        new ResultsElement();
        return searchWithDictionary(this.amiArgProcessor.ensureWordCollectionFactory().createWordList());
    }

    private ResultsElement searchWithPattern(String str) {
        ResultsElement resultsElement = new ResultsElement();
        Matcher matcher = getPattern().matcher(str);
        for (int i = 0; matcher.find(i); i = matcher.end()) {
            resultsElement.appendChild(createResultElement(str, matcher));
        }
        return resultsElement;
    }

    public void setDictionary(DefaultStringDictionary defaultStringDictionary) {
        this.dictionary = defaultStringDictionary;
    }

    protected String getDictionaryTerm(ResultElement resultElement) {
        String match = resultElement.getMatch();
        if (match == null) {
            match = resultElement.getExact();
        }
        return match;
    }

    protected void markFalsePositives(ResultsElement resultsElement, DefaultAMIDictionary defaultAMIDictionary) {
        if (defaultAMIDictionary == null || resultsElement == null) {
            return;
        }
        for (int size = resultsElement.size() - 1; size >= 0; size--) {
            ResultElement resultElement = resultsElement.get(size);
            if (resultElement != null && !defaultAMIDictionary.contains(getDictionaryTerm(resultElement))) {
                LOG.trace("marking potential false positive: " + resultElement.toXML());
                resultsElement.get(size).setDictionaryCheck(defaultAMIDictionary, false);
            }
        }
    }

    protected void postProcessResultsElement(ResultsElement resultsElement) {
    }

    static {
        LOG.setLevel(Level.DEBUG);
        DEFAULT_POST_WORD_COUNT = 10;
        DEFAULT_PRE_WORD_COUNT = 10;
        LOG.setLevel(Level.DEBUG);
    }
}
