package edu.northwestern.at.morphadorner.examples;

import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.DefaultSuffixLexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.DefaultWordLexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.guesser.DefaultPartOfSpeechGuesser;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.DefaultSentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.utils.FileUtils;
import java.io.BufferedOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.List;

/* loaded from: input_file:edu/northwestern/at/morphadorner/examples/SentenceAndTokenOffsets.class */
public class SentenceAndTokenOffsets {
    public static void main(String[] strArr) {
        try {
            if (strArr.length > 0) {
                displayOffsets(strArr[0]);
            } else {
                System.err.println("Usage: SentenceAndTokenOffsets inputFileName");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void displayOffsets(String str) throws Exception {
        PrintStream printStream = new PrintStream((OutputStream) new BufferedOutputStream(System.out), true, "utf-8");
        String replaceAll = FileUtils.readTextFile(str, "utf-8").replaceAll("\\s", " ");
        DefaultSentenceSplitter defaultSentenceSplitter = new DefaultSentenceSplitter();
        DefaultPartOfSpeechGuesser defaultPartOfSpeechGuesser = new DefaultPartOfSpeechGuesser();
        defaultPartOfSpeechGuesser.setWordLexicon(new DefaultWordLexicon());
        defaultPartOfSpeechGuesser.setSuffixLexicon(new DefaultSuffixLexicon());
        defaultSentenceSplitter.setPartOfSpeechGuesser(defaultPartOfSpeechGuesser);
        DefaultWordTokenizer defaultWordTokenizer = new DefaultWordTokenizer();
        List<List<String>> extractSentences = defaultSentenceSplitter.extractSentences(replaceAll, defaultWordTokenizer);
        int[] findSentenceOffsets = defaultSentenceSplitter.findSentenceOffsets(replaceAll, extractSentences);
        for (int i = 0; i < extractSentences.size(); i++) {
            int i2 = findSentenceOffsets[i];
            int i3 = findSentenceOffsets[i + 1];
            String substring = replaceAll.substring(i2, i3);
            printStream.println(i + " [" + i2 + "," + (i3 - 1) + "]: " + substring);
            List<String> list = extractSentences.get(i);
            int[] findWordOffsets = defaultWordTokenizer.findWordOffsets(substring, list);
            for (int i4 = 0; i4 < list.size(); i4++) {
                int i5 = findWordOffsets[i4];
                int length = findWordOffsets[i4] + list.get(i4).toString().length();
                printStream.println("          " + i4 + " [" + i5 + "," + (length - 1) + "]: " + substring.substring(i5, length));
            }
        }
    }
}
