package uk.ac.cam.ch.wwmm.oscarMEMM;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.xmlcml.euclid.EuclidConstants;
import uk.ac.cam.ch.wwmm.oscar.chemnamedict.core.ChemNameDictRegistry;
import uk.ac.cam.ch.wwmm.oscar.document.IProcessingDocument;
import uk.ac.cam.ch.wwmm.oscar.document.NamedEntity;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;
import uk.ac.cam.ch.wwmm.oscar.ont.OntologyTerms;
import uk.ac.cam.ch.wwmm.oscar.types.NamedEntityType;
import uk.ac.cam.ch.wwmm.oscarMEMM.memm.MEMM;
import uk.ac.cam.ch.wwmm.oscarMEMM.memm.data.MEMMModel;
import uk.ac.cam.ch.wwmm.oscarMEMM.models.ChemPapersModel;
import uk.ac.cam.ch.wwmm.oscarrecogniser.finder.DFAONTCPRFinder;
import uk.ac.cam.ch.wwmm.oscarrecogniser.finder.DFASupplementaryTermFinder;
import uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser;
import uk.ac.cam.ch.wwmm.oscarrecogniser.saf.StandoffResolver;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarMEMM/MEMMRecogniser.class */
public class MEMMRecogniser implements ChemicalEntityRecogniser {
    private MEMMModel model;
    private double memmThreshold;
    private DFAONTCPRFinder ontologyAndPrefixTermFinder;
    private double ontPseudoConfidence;
    private double custPseudoConfidence;
    private double cprPseudoConfidence;
    private boolean deprioritiseOnts;
    private DFASupplementaryTermFinder supplementaryTermFinder;

    public MEMMRecogniser() {
        this(new ChemPapersModel(), OntologyTerms.getDefaultInstance(), new ChemNameDictRegistry(Locale.ENGLISH));
    }

    public MEMMRecogniser(MEMMModel mEMMModel, OntologyTerms ontologyTerms, ChemNameDictRegistry chemNameDictRegistry) {
        this.memmThreshold = 0.2d;
        this.ontPseudoConfidence = 0.2d;
        this.custPseudoConfidence = 0.2d;
        this.cprPseudoConfidence = 0.2d;
        this.deprioritiseOnts = false;
        this.model = mEMMModel;
        this.ontologyAndPrefixTermFinder = new DFAONTCPRFinder(ontologyTerms);
        if (chemNameDictRegistry.getAllNames().size() > 0) {
            this.supplementaryTermFinder = new DFASupplementaryTermFinder(chemNameDictRegistry);
        }
    }

    public MEMMModel getModel() {
        return this.model;
    }

    public double getMemmThreshold() {
        return this.memmThreshold;
    }

    public void setMemmThreshold(double d) {
        this.memmThreshold = d;
    }

    public DFAONTCPRFinder getOntologyAndPrefixTermFinder() {
        return this.ontologyAndPrefixTermFinder;
    }

    public List<NamedEntity> findNamedEntities(IProcessingDocument iProcessingDocument) {
        return findNamedEntities(iProcessingDocument.getTokenSequences());
    }

    @Override // uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser
    public List<NamedEntity> findNamedEntities(List<TokenSequence> list) {
        return findNamedEntities(list, StandoffResolver.ResolutionMode.REMOVE_BLOCKED);
    }

    @Override // uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser
    public List<NamedEntity> findNamedEntities(List<TokenSequence> list, StandoffResolver.ResolutionMode resolutionMode) {
        List<NamedEntity> generateNamedEntities = generateNamedEntities(list);
        if (this.supplementaryTermFinder != null) {
            generateNamedEntities.addAll(generateSupplementaryNameTerms(list));
        }
        generateNamedEntities.addAll(generateOntologyAndPrefixTerms(list));
        mergeNamedEntities(generateNamedEntities);
        setPseudoConfidences(generateNamedEntities);
        Collections.sort(generateNamedEntities);
        if (resolutionMode == StandoffResolver.ResolutionMode.REMOVE_BLOCKED) {
            StandoffResolver.resolveStandoffs(generateNamedEntities);
        } else {
            if (resolutionMode != StandoffResolver.ResolutionMode.MARK_BLOCKED) {
                throw new RuntimeException(resolutionMode + " not yet implemented");
            }
            StandoffResolver.markBlockedStandoffs(generateNamedEntities);
        }
        return generateNamedEntities;
    }

    private void mergeNamedEntities(List<NamedEntity> list) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (NamedEntity namedEntity : list) {
            String str = namedEntity.getStart() + EuclidConstants.S_COLON + namedEntity.getEnd();
            Set<String> ontIds = namedEntity.getOntIds();
            if (ontIds != null) {
                Set set = (Set) hashMap.get(str);
                if (set == null) {
                    hashMap.put(str, new HashSet(ontIds));
                } else {
                    set.addAll(ontIds);
                }
            }
            Set<String> custTypes = namedEntity.getCustTypes();
            if (custTypes != null) {
                Set set2 = (Set) hashMap2.get(str);
                if (set2 == null) {
                    hashMap2.put(str, new HashSet(custTypes));
                } else {
                    set2.addAll(custTypes);
                }
            }
        }
        for (NamedEntity namedEntity2 : list) {
            String str2 = namedEntity2.getStart() + EuclidConstants.S_COLON + namedEntity2.getEnd();
            Set<String> set3 = (Set) hashMap.get(str2);
            if (set3 != null) {
                namedEntity2.setOntIds(set3);
            }
            Set<String> set4 = (Set) hashMap2.get(str2);
            if (set4 != null) {
                namedEntity2.setCustTypes(set4);
            }
        }
    }

    private List<NamedEntity> generateNamedEntities(List<TokenSequence> list) {
        ArrayList arrayList = new ArrayList();
        MEMM memm = new MEMM(this.model, this.memmThreshold / 5.0d);
        Iterator<TokenSequence> it = list.iterator();
        while (it.hasNext()) {
            for (NamedEntity namedEntity : memm.findNEs(it.next())) {
                if (namedEntity.getConfidence() > this.memmThreshold) {
                    arrayList.add(namedEntity);
                }
            }
        }
        memm.rescore(arrayList);
        return arrayList;
    }

    private List<NamedEntity> generateOntologyAndPrefixTerms(List<TokenSequence> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<TokenSequence> it = list.iterator();
        while (it.hasNext()) {
            arrayList.addAll(this.ontologyAndPrefixTermFinder.findNamedEntities(it.next()));
        }
        return arrayList;
    }

    private List<NamedEntity> generateSupplementaryNameTerms(List<TokenSequence> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<TokenSequence> it = list.iterator();
        while (it.hasNext()) {
            arrayList.addAll(this.supplementaryTermFinder.findNamedEntities(it.next()));
        }
        return arrayList;
    }

    void setPseudoConfidences(List<NamedEntity> list) {
        for (NamedEntity namedEntity : list) {
            double d = Double.NaN;
            NamedEntityType type = namedEntity.getType();
            if (NamedEntityType.ONTOLOGY.isInstance(type)) {
                d = this.ontPseudoConfidence;
            } else if (NamedEntityType.LOCANTPREFIX.isInstance(type)) {
                d = this.cprPseudoConfidence;
            } else if (NamedEntityType.CUSTOM.isInstance(type)) {
                d = this.custPseudoConfidence;
            }
            namedEntity.setPseudoConfidence(d);
            namedEntity.setDeprioritiseOnt(this.deprioritiseOnts);
        }
    }

    public double getOntPseudoConfidence() {
        return this.ontPseudoConfidence;
    }

    public void setOntPseudoConfidence(double d) {
        this.ontPseudoConfidence = d;
    }

    public double getCustPseudoConfidence() {
        return this.custPseudoConfidence;
    }

    public void setCustPseudoConfidence(double d) {
        this.custPseudoConfidence = d;
    }

    public double getCprPseudoConfidence() {
        return this.cprPseudoConfidence;
    }

    public void setCprPseudoConfidence(double d) {
        this.cprPseudoConfidence = d;
    }

    public void setDeprioritiseOnts(boolean z) {
        this.deprioritiseOnts = z;
    }
}
