package edu.northwestern.at.morphadorner.gate;

import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.DefaultSentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.Formatters;
import gate.AnnotationSet;
import gate.DocumentContent;
import gate.Resource;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.GateRuntimeException;
import gate.util.SimpleFeatureMapImpl;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:edu/northwestern/at/morphadorner/gate/TokenizerGateWrapper.class */
public class TokenizerGateWrapper extends MorphAdornerGateWrapperBase {
    protected SentenceSplitter sentenceSplitter;
    protected WordTokenizer tokenizer;

    /* loaded from: input_file:edu/northwestern/at/morphadorner/gate/TokenizerGateWrapper$TokenAnnotation.class */
    class TokenAnnotation {
        long start;
        long end;
        String string;

        TokenAnnotation() {
        }
    }

    @Override // edu.northwestern.at.morphadorner.gate.MorphAdornerGateWrapperBase
    public Resource init() throws ResourceInstantiationException {
        commonInit();
        this.tokenizer = new DefaultWordTokenizer();
        this.sentenceSplitter = new DefaultSentenceSplitter();
        this.sentenceSplitter.setPartOfSpeechGuesser(this.guesser);
        return super.init();
    }

    @Override // edu.northwestern.at.morphadorner.gate.MorphAdornerGateWrapperBase
    public void execute() throws ExecutionException {
        try {
            if (this.document == null) {
                throw new GateRuntimeException("No document to process!");
            }
            String obj = this.document.getContent().toString();
            DocumentContent content = this.document.getContent();
            this.document.getContent().size().longValue();
            fireStatusChanged("Tokenizing " + this.document.getName());
            fireProgressChanged(0);
            List<List<String>> extractSentences = this.sentenceSplitter.extractSentences(obj, this.tokenizer);
            int[] findSentenceOffsets = this.sentenceSplitter.findSentenceOffsets(obj, extractSentences);
            fireStatusChanged("Extracted " + Formatters.formatIntegerWithCommas(extractSentences.size()) + " sentences");
            fireProgressChanged(0);
            AnnotationSet annotations = this.inputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.inputASName);
            if (this.outputASName != null && this.outputASName.length() == 0) {
                this.outputASName = null;
            }
            AnnotationSet annotations2 = this.outputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.outputASName);
            for (int i = 0; i < extractSentences.size(); i++) {
                long j = findSentenceOffsets[i];
                long j2 = findSentenceOffsets[i + 1];
                String obj2 = content.getContent(Long.valueOf(j), Long.valueOf(j2)).toString();
                List<String> list = extractSentences.get(i);
                int[] findWordOffsets = this.tokenizer.findWordOffsets(obj2, list);
                ArrayList<TokenAnnotation> arrayList = new ArrayList();
                for (int i2 = 0; i2 < list.size(); i2++) {
                    long j3 = findWordOffsets[i2];
                    long length = j3 + list.get(i2).length();
                    TokenAnnotation tokenAnnotation = new TokenAnnotation();
                    tokenAnnotation.start = j3 + j;
                    tokenAnnotation.end = length + j;
                    tokenAnnotation.string = list.get(i2);
                    arrayList.add(tokenAnnotation);
                }
                for (TokenAnnotation tokenAnnotation2 : arrayList) {
                    SimpleFeatureMapImpl simpleFeatureMapImpl = new SimpleFeatureMapImpl();
                    simpleFeatureMapImpl.put("string", tokenAnnotation2.string);
                    simpleFeatureMapImpl.put("category", "");
                    annotations2.add(Long.valueOf(tokenAnnotation2.start), Long.valueOf(tokenAnnotation2.end), this.baseTokenAnnotationType, simpleFeatureMapImpl);
                }
                if (j2 > j) {
                    annotations2.add(Long.valueOf(j), Long.valueOf(j2), this.baseSentenceAnnotationType, new SimpleFeatureMapImpl());
                }
                fireStatusChanged("Added sentence " + i);
                fireProgressChanged(0);
            }
        } catch (Exception e) {
            throw new ExecutionException(e);
        }
    }
}
