package uk.ac.cam.ch.wwmm.oscarrecogniser.finder;

import dk.brics.automaton.Automaton;
import dk.brics.automaton.RegExp;
import dk.brics.automaton.RunAutomaton;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.xmlcml.euclid.EuclidConstants;
import uk.ac.cam.ch.wwmm.oscar.document.NamedEntity;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;
import uk.ac.cam.ch.wwmm.oscar.ont.OntologyTerms;
import uk.ac.cam.ch.wwmm.oscar.tools.StringTools;
import uk.ac.cam.ch.wwmm.oscar.types.NamedEntityType;
import uk.ac.cam.ch.wwmm.oscarrecogniser.tokenanalysis.PrefixFinder;
import uk.ac.cam.ch.wwmm.oscartokeniser.Tokeniser;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarrecogniser/finder/DFAFinder.class */
public abstract class DFAFinder implements Serializable {
    private static final long serialVersionUID = 6130629462990087075L;
    protected OntologyTerms ontologyTerms;
    private static final Pattern matchSubRe = Pattern.compile("\\$\\{.*\\}");
    private static final Pattern digitOrSpace = Pattern.compile("[0-9 ]+");
    private final Map<NamedEntityType, List<Automaton>> autLists = new HashMap();
    private final Map<NamedEntityType, SuffixTree> simpleAuts = new HashMap();
    private final Map<NamedEntityType, RunAutomaton> runAuts = new HashMap();
    private final Map<String, String> tokenToRep = new HashMap();
    private final Set<String> literals = new HashSet();
    private final AtomicInteger tokenId = new AtomicInteger();
    private final Map<String, Integer> ontIdToIntId = new HashMap();
    private final List<String> ontIds = new ArrayList();
    private final Map<NamedEntityType, Map<Integer, Set<String>>> runAutToStateToOntIds = new HashMap();
    private final Map<String, Pattern> subRes = new HashMap();

    protected abstract void loadTerms();

    /* JADX INFO: Access modifiers changed from: protected */
    public void init() {
        initLiterals();
        loadTerms();
        finishInit();
    }

    private void initLiterals() {
        this.literals.add("$(");
        this.literals.add("$)");
        this.literals.add("$+");
        this.literals.add("$*");
        this.literals.add("$|");
        this.literals.add("$?");
        this.literals.add("$^");
    }

    public OntologyTerms getOntologyTerms() {
        return this.ontologyTerms;
    }

    private String generateTokenRepresentation(String str) {
        if (isLiteral(str)) {
            return str.substring(1);
        }
        if (this.tokenToRep.containsKey(str)) {
            return this.tokenToRep.get(str);
        }
        String str2 = this.tokenId.incrementAndGet() + EuclidConstants.S_SPACE;
        this.tokenToRep.put(str, str2);
        if (isSubRe(str)) {
            this.subRes.put(str, Pattern.compile(str.substring(2, str.length() - 1)));
        }
        return str2;
    }

    private boolean isSubRe(String str) {
        return matchSubRe.matcher(str).matches();
    }

    private boolean isLiteral(String str) {
        return this.literals.contains(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Set<String> getSubReRepsForToken(String str) {
        HashSet hashSet = new HashSet();
        for (String str2 : this.subRes.keySet()) {
            if (this.subRes.get(str2).matcher(str).matches()) {
                hashSet.add(str2);
            }
        }
        return hashSet;
    }

    protected String getCachedTokenRepresentation(String str) {
        if (this.tokenToRep.containsKey(str)) {
            return this.tokenToRep.get(str);
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addNamedEntity(String str, NamedEntityType namedEntityType, boolean z) {
        List<String> tokenStringList = Tokeniser.getDefaultInstance().tokenise(str).getTokenStringList();
        if (z || tokenStringList.size() != 1 || str.contains("$")) {
            StringBuffer stringBuffer = new StringBuffer();
            Iterator<String> it = tokenStringList.iterator();
            while (it.hasNext()) {
                stringBuffer.append(generateTokenRepresentation(it.next()));
            }
            String stringBuffer2 = stringBuffer.toString();
            TermMaps termMaps = TermMaps.getInstance();
            for (String str2 : StringTools.expandRegex(stringBuffer2)) {
                if (digitOrSpace.matcher(str2).matches()) {
                    if (this.simpleAuts.containsKey(namedEntityType)) {
                        this.simpleAuts.get(namedEntityType).addContents(str2);
                    } else {
                        this.simpleAuts.put(namedEntityType, new SuffixTree(str2));
                    }
                    if (isOntologyTerm(str, namedEntityType)) {
                        Iterator<String> it2 = this.ontologyTerms.getIdsForTerm(str).iterator();
                        while (it2.hasNext()) {
                            this.simpleAuts.get(namedEntityType).addContents(str2 + "X" + getNumberForOntologyId(it2.next()));
                        }
                    } else if (isCustomTerm(str, namedEntityType)) {
                        Iterator it3 = Arrays.asList(StringTools.splitOnWhitespace(termMaps.getCustEnt().get(str))).iterator();
                        while (it3.hasNext()) {
                            this.simpleAuts.get(namedEntityType).addContents(str2 + "X" + getNumberForOntologyId((String) it3.next()));
                        }
                    }
                } else {
                    if (isOntologyTerm(str, namedEntityType)) {
                        List<String> idsForTerm = this.ontologyTerms.getIdsForTerm(str);
                        stringBuffer.append("(X(");
                        Iterator<String> it4 = idsForTerm.iterator();
                        while (it4.hasNext()) {
                            stringBuffer.append(Integer.toString(getNumberForOntologyId(it4.next())));
                            if (it4.hasNext()) {
                                stringBuffer.append('|');
                            }
                        }
                        stringBuffer.append("))?");
                    } else if (isCustomTerm(str, namedEntityType)) {
                        List asList = Arrays.asList(StringTools.splitOnWhitespace(termMaps.getCustEnt().get(str)));
                        stringBuffer.append("(X(");
                        Iterator it5 = asList.iterator();
                        while (it5.hasNext()) {
                            stringBuffer.append(Integer.toString(getNumberForOntologyId((String) it5.next())));
                            if (it5.hasNext()) {
                                stringBuffer.append('|');
                            }
                        }
                        stringBuffer.append("))?");
                    }
                    getAutomatonList(namedEntityType).add(new RegExp(stringBuffer.toString()).toAutomaton());
                }
            }
        }
    }

    private boolean isCustomTerm(String str, NamedEntityType namedEntityType) {
        return NamedEntityType.CUSTOM.isInstance(namedEntityType) && TermMaps.getInstance().getCustEnt().containsKey(str);
    }

    private boolean isOntologyTerm(String str, NamedEntityType namedEntityType) {
        return NamedEntityType.ONTOLOGY.isInstance(namedEntityType) && this.ontologyTerms.containsTerm(str);
    }

    private List<Automaton> getAutomatonList(NamedEntityType namedEntityType) {
        if (!this.autLists.containsKey(namedEntityType)) {
            this.autLists.put(namedEntityType, new ArrayList());
        }
        return this.autLists.get(namedEntityType);
    }

    private int getNumberForOntologyId(String str) {
        if (this.ontIdToIntId.containsKey(str)) {
            return this.ontIdToIntId.get(str).intValue();
        }
        int size = this.ontIds.size();
        this.ontIds.add(str);
        this.ontIdToIntId.put(str, Integer.valueOf(size));
        return size;
    }

    private void finishInit() {
        Iterator it = new HashSet(this.autLists.keySet()).iterator();
        while (it.hasNext()) {
            NamedEntityType namedEntityType = (NamedEntityType) it.next();
            Automaton union = Automaton.union(this.autLists.get(namedEntityType));
            union.determinize();
            this.runAuts.put(namedEntityType, new RunAutomaton(union, false));
            this.autLists.remove(namedEntityType);
        }
        Iterator it2 = new HashSet(this.simpleAuts.keySet()).iterator();
        while (it2.hasNext()) {
            NamedEntityType namedEntityType2 = (NamedEntityType) it2.next();
            this.runAuts.put(NamedEntityType.valueOf(namedEntityType2.getName() + "-b"), new RunAutomaton(this.simpleAuts.get(namedEntityType2).toAutomaton(), false));
            this.simpleAuts.remove(namedEntityType2);
        }
        this.runAutToStateToOntIds.clear();
        for (NamedEntityType namedEntityType3 : this.runAuts.keySet()) {
            if (NamedEntityType.ONTOLOGY.isInstance(namedEntityType3) || NamedEntityType.CUSTOM.isInstance(namedEntityType3)) {
                this.runAutToStateToOntIds.put(namedEntityType3, analyseAutomaton(this.runAuts.get(namedEntityType3), 'X'));
            }
        }
    }

    private Set<String> readOffTags(RunAutomaton runAutomaton, int i) {
        HashSet hashSet = new HashSet();
        readOffTags(runAutomaton, i, "", hashSet);
        return hashSet;
    }

    private void readOffTags(RunAutomaton runAutomaton, int i, String str, Set<String> set) {
        if (runAutomaton.isAccept(i)) {
            set.add(this.ontIds.get(Integer.parseInt(str)));
        }
        for (int i2 = 0; i2 < 10; i2++) {
            int step = runAutomaton.step(i, Integer.toString(i2).charAt(0));
            if (step != -1) {
                readOffTags(runAutomaton, step, str + i2, set);
            }
        }
    }

    private Map<Integer, Set<String>> analyseAutomaton(RunAutomaton runAutomaton, char c) {
        HashMap hashMap = new HashMap();
        for (int i = 0; i < runAutomaton.getSize(); i++) {
            if (runAutomaton.isAccept(i) && runAutomaton.step(i, c) != -1) {
                hashMap.put(Integer.valueOf(i), readOffTags(runAutomaton, runAutomaton.step(i, c)));
            }
        }
        return hashMap;
    }

    protected void handleNamedEntity(AutomatonState automatonState, int i, TokenSequence tokenSequence, NECollector nECollector) {
        String substring = tokenSequence.getSubstring(automatonState.getStartToken(), i);
        NamedEntityType type = automatonState.getType();
        if (type.getParent() != null) {
            type = type.getParent();
        }
        NamedEntity namedEntity = new NamedEntity(tokenSequence.getTokens(automatonState.getStartToken(), i), substring, type);
        nECollector.collect(namedEntity);
        if (NamedEntityType.ONTOLOGY.isInstance(automatonState.getType())) {
            Set<String> set = this.runAutToStateToOntIds.get(automatonState.getType()).get(Integer.valueOf(automatonState.getState()));
            List<String> idsForTerm = this.ontologyTerms.getIdsForTerm(StringTools.normaliseName(substring));
            if (idsForTerm != null) {
                if (set == null) {
                    set = new HashSet();
                }
                set.addAll(idsForTerm);
            }
            namedEntity.addOntIds(set);
        }
        if (NamedEntityType.CUSTOM.isInstance(automatonState.getType())) {
            namedEntity.addCustTypes(this.runAutToStateToOntIds.get(automatonState.getType()).get(Integer.valueOf(automatonState.getState())));
        }
    }

    protected void handleTokenForPrefix(Token token, NECollector nECollector) {
        Token nAfter;
        String prefix;
        String prefix2 = PrefixFinder.getPrefix(token.getSurface());
        if (prefix2 != null) {
            nECollector.collect(NamedEntity.forPrefix(token, prefix2));
        } else {
            if (!"-".equals(token.getSurface()) || (nAfter = token.getNAfter(-1)) == null || (prefix = PrefixFinder.getPrefix(token.getTokenSequence().getSurface().substring(nAfter.getStart(), token.getEnd()))) == null) {
                return;
            }
            nECollector.collect(NamedEntity.forPrefix(token, prefix));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void findItems(TokenSequence tokenSequence, List<RepresentationList> list, NECollector nECollector) {
        findItems(tokenSequence, list, 0, tokenSequence.getTokens().size() - 1, nECollector);
    }

    protected void findItems(TokenSequence tokenSequence, List<RepresentationList> list, int i, int i2, NECollector nECollector) {
        List<AutomatonState> initAutomatonStates = initAutomatonStates();
        List<AutomatonState> arrayList = new ArrayList();
        for (int i3 = i; i3 <= i2; i3++) {
            Token token = tokenSequence.getToken(i3);
            handleTokenForPrefix(token, nECollector);
            RepresentationList representationList = list.get(token.getIndex());
            if (representationList.isEmpty()) {
                initAutomatonStates.clear();
            } else {
                for (NamedEntityType namedEntityType : this.runAuts.keySet()) {
                    initAutomatonStates.add(new AutomatonState(this.runAuts.get(namedEntityType), namedEntityType, i3));
                }
                Iterator<String> it = representationList.iterator();
                while (it.hasNext()) {
                    String next = it.next();
                    String cachedTokenRepresentation = getCachedTokenRepresentation(next);
                    if (cachedTokenRepresentation != null) {
                        for (int i4 = 0; i4 < initAutomatonStates.size(); i4++) {
                            AutomatonState m1369clone = initAutomatonStates.get(i4).m1369clone();
                            if (stepIntoAutomaton(cachedTokenRepresentation, m1369clone)) {
                                m1369clone.addRep(next);
                                if (m1369clone.isAccept()) {
                                    handleNamedEntity(m1369clone, i3, tokenSequence, nECollector);
                                }
                                arrayList.add(m1369clone);
                            }
                        }
                    }
                }
                List<AutomatonState> list2 = initAutomatonStates;
                initAutomatonStates = arrayList;
                list2.clear();
                arrayList = list2;
            }
        }
    }

    private List<AutomatonState> initAutomatonStates() {
        ArrayList arrayList = new ArrayList();
        for (NamedEntityType namedEntityType : this.runAuts.keySet()) {
            AutomatonState automatonState = new AutomatonState(this.runAuts.get(namedEntityType), namedEntityType, 0);
            if (stepIntoAutomaton(generateTokenRepresentation("$^"), automatonState)) {
                automatonState.addRep("$^");
                arrayList.add(automatonState);
            }
        }
        return arrayList;
    }

    private boolean stepIntoAutomaton(String str, AutomatonState automatonState) {
        for (int i = 0; i < str.length(); i++) {
            automatonState.step(str.charAt(i));
            if (automatonState.getState() == -1) {
                return false;
            }
        }
        return true;
    }
}
