package uk.ac.cam.ch.wwmm.oscarrecogniser.finder;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import uk.ac.cam.ch.wwmm.oscar.document.NamedEntity;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;
import uk.ac.cam.ch.wwmm.oscar.ont.OntologyTerms;
import uk.ac.cam.ch.wwmm.oscar.tools.StringTools;
import uk.ac.cam.ch.wwmm.oscar.types.NamedEntityType;
import uk.ac.cam.ch.wwmm.oscartokeniser.Tokeniser;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarrecogniser/finder/DFAONTCPRFinder.class */
public class DFAONTCPRFinder extends DFAFinder {
    private static final long serialVersionUID = -1417523538712568934L;
    private static DFAONTCPRFinder myInstance;
    private static final String SERIALIZED_DFAFINDER = "dfa_ontcpr.dat.gz";
    private static final String REP_ONTWORD = "$ONTWORD";
    private static final String REP_HYPH = "$HYPH";
    private static final String REP_DOTS = "$DOTS";

    private static DFAONTCPRFinder readFromWorkspace() throws IOException {
        InputStream resourceAsStream = DFAONTCPRFinder.class.getResourceAsStream(SERIALIZED_DFAFINDER);
        if (resourceAsStream == null) {
            throw new FileNotFoundException("File not found: dfa_ontcpr.dat.gz");
        }
        ObjectInputStream objectInputStream = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(resourceAsStream)));
        try {
            DFAONTCPRFinder dFAONTCPRFinder = (DFAONTCPRFinder) objectInputStream.readObject();
            objectInputStream.close();
            return dFAONTCPRFinder;
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
            throw new RuntimeException("Unable to restore DFAONTCPRFinder from dfas.dat!");
        }
    }

    private static void writeToWorkspace(DFAONTCPRFinder dFAONTCPRFinder) throws FileNotFoundException, IOException {
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(SERIALIZED_DFAFINDER))));
        objectOutputStream.writeObject(dFAONTCPRFinder);
        objectOutputStream.close();
    }

    public static void buildAndSerializeDFAONTCPRFinder(OntologyTerms ontologyTerms) throws FileNotFoundException, IOException {
        writeToWorkspace(new DFAONTCPRFinder(ontologyTerms));
    }

    public static DFAONTCPRFinder getDefaultInstance() {
        if (myInstance == null) {
            myInstance = new DFAONTCPRFinder(OntologyTerms.getDefaultInstance());
        }
        return myInstance;
    }

    public static void reinitialise() {
        myInstance = null;
        getDefaultInstance();
    }

    public static void destroyInstance() {
        myInstance = null;
    }

    public static void destroyInstanceIfWordTokenises(String str) {
        if (myInstance != null && Tokeniser.getDefaultInstance().tokenise(str).getTokens().size() > 1) {
            myInstance = null;
        }
    }

    public DFAONTCPRFinder(OntologyTerms ontologyTerms) {
        this.ontologyTerms = ontologyTerms;
        super.init();
    }

    @Override // uk.ac.cam.ch.wwmm.oscarrecogniser.finder.DFAFinder
    protected void loadTerms() {
        Iterator<String> it = this.ontologyTerms.getAllTerms().iterator();
        while (it.hasNext()) {
            addNamedEntity(it.next(), NamedEntityType.ONTOLOGY, false);
        }
        Iterator<String> it2 = TermMaps.getInstance().getCustEnt().keySet().iterator();
        while (it2.hasNext()) {
            addNamedEntity(it2.next(), NamedEntityType.CUSTOM, true);
        }
        addNamedEntity(REP_ONTWORD, NamedEntityType.ONTOLOGY, false);
    }

    public List<NamedEntity> findNamedEntities(TokenSequence tokenSequence) {
        NECollector nECollector = new NECollector();
        findItems(tokenSequence, generateTokenRepresentations(tokenSequence), nECollector);
        return nECollector.getNes();
    }

    private List<RepresentationList> generateTokenRepresentations(TokenSequence tokenSequence) {
        ArrayList arrayList = new ArrayList();
        Iterator<Token> it = tokenSequence.getTokens().iterator();
        while (it.hasNext()) {
            arrayList.add(generateTokenRepresentations(it.next()));
        }
        return arrayList;
    }

    protected RepresentationList generateTokenRepresentations(Token token) {
        RepresentationList representationList = new RepresentationList();
        String surface = token.getSurface();
        representationList.addRepresentation(surface);
        String normaliseName = StringTools.normaliseName(surface);
        if (!normaliseName.equals(surface)) {
            representationList.addRepresentation(normaliseName);
        }
        if (this.ontologyTerms.containsTerm(normaliseName)) {
            representationList.addRepresentation(REP_ONTWORD);
        }
        if (surface.length() == 1) {
            if (StringTools.isHyphen(surface)) {
                representationList.addRepresentation(REP_HYPH);
            } else if (StringTools.isMidElipsis(surface)) {
                representationList.addRepresentation(REP_DOTS);
            }
        }
        representationList.addRepresentations(getSubReRepsForToken(surface));
        return representationList;
    }

    public static void main(String[] strArr) throws Exception {
        buildAndSerializeDFAONTCPRFinder(OntologyTerms.getDefaultInstance());
    }
}
