package uk.ac.cam.ch.wwmm.oscarpattern;

import com.google.common.collect.HashMultimap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.set.UnmodifiableSet;
import org.xmlcml.euclid.EuclidConstants;
import uk.ac.cam.ch.wwmm.oscar.chemnamedict.core.ChemNameDictRegistry;
import uk.ac.cam.ch.wwmm.oscar.document.IProcessingDocument;
import uk.ac.cam.ch.wwmm.oscar.document.NamedEntity;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;
import uk.ac.cam.ch.wwmm.oscar.ont.OntologyTerms;
import uk.ac.cam.ch.wwmm.oscar.tools.StringTools;
import uk.ac.cam.ch.wwmm.oscar.types.NamedEntityType;
import uk.ac.cam.ch.wwmm.oscarrecogniser.extractedtrainingdata.ExtractedTrainingData;
import uk.ac.cam.ch.wwmm.oscarrecogniser.finder.DFANEFinder;
import uk.ac.cam.ch.wwmm.oscarrecogniser.finder.TermMaps;
import uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser;
import uk.ac.cam.ch.wwmm.oscarrecogniser.saf.StandoffResolver;
import uk.ac.cam.ch.wwmm.oscarrecogniser.tokenanalysis.NGram;
import uk.ac.cam.ch.wwmm.oscarrecogniser.tokenanalysis.NGramBuilder;
import uk.ac.cam.ch.wwmm.oscartokeniser.TokenClassifier;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarpattern/PatternRecogniser.class */
public class PatternRecogniser implements ChemicalEntityRecogniser {
    private NGram nGram;
    private DFANEFinder finder;
    private double ontPseudoConfidence;
    private double custPseudoConfidence;
    private double cprPseudoConfidence;
    private double ngramThreshold;
    private boolean deprioritiseOnts;
    private UnmodifiableSet registryNames;

    public PatternRecogniser() {
        this(ExtractedTrainingData.getDefaultInstance(), TermMaps.getInstance().getNeTerms(), TokenClassifier.getDefaultInstance(), OntologyTerms.getDefaultInstance(), ChemNameDictRegistry.getDefaultInstance());
    }

    public PatternRecogniser(ExtractedTrainingData extractedTrainingData, Map<String, NamedEntityType> map, TokenClassifier tokenClassifier, OntologyTerms ontologyTerms, ChemNameDictRegistry chemNameDictRegistry) {
        this.ontPseudoConfidence = 0.2d;
        this.custPseudoConfidence = 0.2d;
        this.cprPseudoConfidence = 0.2d;
        this.ngramThreshold = -2.0d;
        this.deprioritiseOnts = false;
        this.registryNames = (UnmodifiableSet) UnmodifiableSet.decorate(chemNameDictRegistry.getAllNames());
        this.nGram = NGramBuilder.buildOrDeserialiseModel(extractedTrainingData, this.registryNames);
        this.finder = new DFANEFinder(map, tokenClassifier, ontologyTerms, this.registryNames);
    }

    public List<NamedEntity> findNamedEntities(IProcessingDocument iProcessingDocument) {
        return findNamedEntities(iProcessingDocument.getTokenSequences());
    }

    @Override // uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser
    public List<NamedEntity> findNamedEntities(List<TokenSequence> list) {
        return findNamedEntities(list, StandoffResolver.ResolutionMode.REMOVE_BLOCKED);
    }

    @Override // uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser
    public List<NamedEntity> findNamedEntities(List<TokenSequence> list, StandoffResolver.ResolutionMode resolutionMode) {
        ArrayList<NamedEntity> arrayList = new ArrayList();
        Iterator<TokenSequence> it = list.iterator();
        while (it.hasNext()) {
            arrayList.addAll(this.finder.findNamedEntities(it.next(), this.nGram, this.ngramThreshold));
        }
        ArrayList arrayList2 = new ArrayList();
        for (NamedEntity namedEntity : arrayList) {
            if (NamedEntityType.ONTOLOGY.equals(namedEntity.getType()) || NamedEntityType.LOCANTPREFIX.equals(namedEntity.getType()) || NamedEntityType.CUSTOM.equals(namedEntity.getType())) {
                arrayList2.add(namedEntity);
            }
        }
        mergeOntIdsAndCustTypes(arrayList);
        if (resolutionMode == StandoffResolver.ResolutionMode.REMOVE_BLOCKED) {
            StandoffResolver.resolveStandoffs(arrayList);
        } else {
            if (resolutionMode != StandoffResolver.ResolutionMode.MARK_BLOCKED) {
                throw new RuntimeException(resolutionMode + " not yet implemented");
            }
            StandoffResolver.markBlockedStandoffs(arrayList);
        }
        handlePotentialAcronyms(list, arrayList);
        removeStopwords(arrayList);
        return arrayList;
    }

    static void removeStopwords(List<NamedEntity> list) {
        int i = 0;
        while (i < list.size()) {
            if (NamedEntityType.STOP.equals(list.get(i).getType())) {
                list.remove(i);
            } else {
                i++;
            }
        }
    }

    static void handlePotentialAcronyms(List<TokenSequence> list, List<NamedEntity> list2) {
        HashMap hashMap = new HashMap();
        for (NamedEntity namedEntity : list2) {
            hashMap.put(Integer.valueOf(namedEntity.getEnd()), namedEntity);
        }
        HashMap hashMap2 = new HashMap();
        Iterator<TokenSequence> it = list.iterator();
        while (it.hasNext()) {
            for (Token token : it.next().getTokens()) {
                hashMap2.put(Integer.valueOf(token.getStart()), token);
            }
        }
        Map<String, NamedEntityType> identifyAcronyms = identifyAcronyms(list2, hashMap, hashMap2);
        int i = 0;
        while (i < list2.size()) {
            NamedEntity namedEntity2 = list2.get(i);
            if (!NamedEntityType.POTENTIALACRONYM.equals(namedEntity2.getType())) {
                i++;
            } else if (identifyAcronyms.containsKey(namedEntity2.getSurface())) {
                namedEntity2.setType(identifyAcronyms.get(namedEntity2.getSurface()));
                i++;
            } else {
                list2.remove(i);
            }
        }
    }

    static Map<String, NamedEntityType> identifyAcronyms(List<NamedEntity> list, Map<Integer, NamedEntity> map, Map<Integer, Token> map2) {
        Token token;
        HashMap hashMap = new HashMap();
        for (NamedEntity namedEntity : list) {
            if (NamedEntityType.POTENTIALACRONYM.equals(namedEntity.getType()) && (token = map2.get(Integer.valueOf(namedEntity.getStart()))) != null && token.getNAfter(-2) != null && token.getNAfter(1) != null) {
                Token nAfter = token.getNAfter(-1);
                Token nAfter2 = token.getNAfter(1);
                Token nAfter3 = token.getNAfter(-2);
                if (nAfter.getSurface().equals("(") && nAfter2.getSurface().endsWith(")") && map.containsKey(Integer.valueOf(nAfter3.getEnd()))) {
                    NamedEntity namedEntity2 = map.get(Integer.valueOf(nAfter3.getEnd()));
                    if (StringTools.testForAcronym(namedEntity.getSurface(), namedEntity2.getSurface()) && !NamedEntityType.ASE.equals(namedEntity2.getType()) && !NamedEntityType.ASES.equals(namedEntity2.getType())) {
                        if (hashMap.containsKey(namedEntity.getSurface())) {
                            NamedEntityType type = namedEntity.getType();
                            NamedEntityType namedEntityType = (NamedEntityType) hashMap.get(namedEntity.getSurface());
                            if (NamedEntityType.POLYMER.equals(type)) {
                                hashMap.put(namedEntity.getSurface(), namedEntity2.getType());
                            } else if (NamedEntityType.COMPOUND.equals(type) && !NamedEntityType.POLYMER.equals(namedEntityType)) {
                                hashMap.put(namedEntity.getSurface(), namedEntity2.getType());
                            }
                        } else {
                            hashMap.put(namedEntity.getSurface(), namedEntity2.getType());
                        }
                    }
                }
            }
        }
        return hashMap;
    }

    static void mergeOntIdsAndCustTypes(List<NamedEntity> list) {
        HashMultimap create = HashMultimap.create();
        HashMultimap create2 = HashMultimap.create();
        for (NamedEntity namedEntity : list) {
            String str = namedEntity.getStart() + EuclidConstants.S_COLON + namedEntity.getEnd();
            create.putAll(str, namedEntity.getOntIds());
            create2.putAll(str, namedEntity.getCustTypes());
        }
        for (NamedEntity namedEntity2 : list) {
            String str2 = namedEntity2.getStart() + EuclidConstants.S_COLON + namedEntity2.getEnd();
            Set<V> set = create.get((HashMultimap) str2);
            if (set.size() > 0) {
                namedEntity2.setOntIds(set);
            }
            Set<V> set2 = create2.get((HashMultimap) str2);
            if (set2.size() > 0) {
                namedEntity2.setCustTypes(set2);
            }
        }
    }

    void setPseudoConfidences(List<NamedEntity> list) {
        for (NamedEntity namedEntity : list) {
            NamedEntityType type = namedEntity.getType();
            double d = type.equals(NamedEntityType.ONTOLOGY) ? this.ontPseudoConfidence : Double.NaN;
            if (type.equals(NamedEntityType.LOCANTPREFIX)) {
                d = this.cprPseudoConfidence;
            }
            if (type.equals(NamedEntityType.CUSTOM)) {
                d = this.custPseudoConfidence;
            }
            namedEntity.setPseudoConfidence(d);
            namedEntity.setDeprioritiseOnt(this.deprioritiseOnts);
        }
    }

    public double getOntPseudoConfidence() {
        return this.ontPseudoConfidence;
    }

    public void setOntPseudoConfidence(double d) {
        this.ontPseudoConfidence = d;
    }

    public double getCustPseudoConfidence() {
        return this.custPseudoConfidence;
    }

    public void setCustPseudoConfidence(double d) {
        this.custPseudoConfidence = d;
    }

    public double getCprPseudoConfidence() {
        return this.cprPseudoConfidence;
    }

    public void setCprPseudoConfidence(double d) {
        this.cprPseudoConfidence = d;
    }

    public void setNgramThreshold(double d) {
        this.ngramThreshold = d;
    }

    public double getNgramThreshold() {
        return this.ngramThreshold;
    }

    public void setDeprioritiseOnts(boolean z) {
        this.deprioritiseOnts = z;
    }

    public UnmodifiableSet getRegistryNames() {
        return this.registryNames;
    }
}
