package edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter;

import edu.northwestern.at.morphadorner.corpuslinguistics.abbreviations.Abbreviations;
import edu.northwestern.at.morphadorner.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.IsCloseable;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MutableInteger;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

/* loaded from: input_file:edu/northwestern/at/morphadorner/corpuslinguistics/sentencesplitter/AbstractSentenceSplitter.class */
public abstract class AbstractSentenceSplitter extends IsCloseableObject implements SentenceSplitter, IsCloseable, UsesLogger {
    protected PartOfSpeechGuesser partOfSpeechGuesser;
    protected SentenceSplitterIterator sentenceSplitterIterator;
    protected static final String disallowedSentenceStarters = ",%.";
    protected WordTokenizer wordTokenizer = new DefaultWordTokenizer();
    protected Names names = new Names();
    protected Abbreviations abbreviations = new Abbreviations();
    protected Logger logger = new DummyLogger();

    @Override // edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter
    public void setPartOfSpeechGuesser(PartOfSpeechGuesser partOfSpeechGuesser) {
        this.partOfSpeechGuesser = partOfSpeechGuesser;
    }

    @Override // edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter
    public void setAbbreviations(Abbreviations abbreviations) {
        this.abbreviations = abbreviations;
    }

    @Override // edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter
    public void setSentenceSplitterIterator(SentenceSplitterIterator sentenceSplitterIterator) {
        this.sentenceSplitterIterator = sentenceSplitterIterator;
    }

    protected boolean fixUpSentence(List<String> list, List<String> list2) {
        boolean z = true;
        boolean z2 = list.size() == 0;
        while (!z2) {
            z2 = true;
            if (list2 != null && list2.size() > 0) {
                if (isClosingPunctuationOnly(list)) {
                    list2.addAll(list);
                    list.clear();
                    return z;
                }
                String str = list2.get(list2.size() - 1);
                if (str.equals(")") || str.equals("]") || str.equals("}") || str.equals(CharUtils.LONG_DASH_STRING)) {
                    z = CharUtils.isCapitalLetter(list.get(0).charAt(0));
                } else if (list.get(0).equals(CharUtils.LONG_DASH_STRING)) {
                    int size = list2.size() - 1;
                    String str2 = size >= 0 ? list2.get(size) : "";
                    if (!str2.endsWith(".") && !str2.endsWith("!") && !str2.endsWith("?")) {
                        list2.add(CharUtils.LONG_DASH_STRING);
                        list.remove(0);
                        z2 = list.size() == 0;
                    }
                } else if (list.get(0).equals(",")) {
                    list2.add(",");
                    list.remove(0);
                    z2 = list.size() == 0;
                } else if (CharUtils.isNumber(list.get(0)) && CharUtils.isNumber(str)) {
                    list2.add(list.get(0));
                    list.remove(0);
                    z2 = list.size() == 0;
                } else if (!list.get(0).equals("s")) {
                    Abbreviations abbreviations = this.abbreviations;
                    if (Abbreviations.isInitial(str)) {
                        z = false;
                        String str3 = list.get(0);
                        if (CharUtils.isCapitalLetter(str3.charAt(0)) && !isNoun(str3)) {
                            z = true;
                        }
                    } else if (this.abbreviations.isAbbreviation(str)) {
                        if (!this.abbreviations.isEOSAbbreviation(str)) {
                            z = false;
                        } else if (verbSeen(list2)) {
                            String str4 = list.get(0);
                            if (!CharUtils.isCapitalLetter(str4.charAt(0))) {
                                z = true;
                            } else if (isProperNoun(str4) || isPronoun(str4)) {
                                boolean z3 = false;
                                String str5 = "";
                                for (int i = 1; i < list.size() && !z3; i++) {
                                    String str6 = list.get(i);
                                    z3 = isVerb(str6) && !str5.equals("to");
                                    str5 = str6.toLowerCase();
                                    if (z3) {
                                        break;
                                    }
                                }
                                z = z3;
                            } else {
                                z = true;
                            }
                        } else {
                            z = false;
                        }
                    } else if (list.get(0).length() == 1) {
                        char charAt = list.get(0).charAt(0);
                        disallowedSentenceStarters.indexOf(charAt);
                        if (disallowedSentenceStarters.indexOf(charAt) >= 0) {
                            z = false;
                        }
                    } else if (Character.isLowerCase(list.get(0).charAt(0))) {
                        z = false;
                    }
                } else if (CharUtils.endsWithSingleQuote(str)) {
                    String str7 = str + list.get(0);
                    list.remove(0);
                    if (CharUtils.isSingleQuoteS(str7)) {
                        String str8 = list2.get(list2.size() - 2);
                        if (str8.endsWith(".") || !CharUtils.isPunctuation(str8)) {
                            str7 = str8 + str7;
                            list2.remove(list2.size() - 1);
                        }
                    }
                    list2.remove(list2.size() - 1);
                    list2.add(str7);
                    z = false;
                }
            }
        }
        return z;
    }

    public boolean isClosingPunctuationOnly(List<String> list) {
        boolean z = false;
        if (list != null && list.size() > 0) {
            int i = 0;
            while (true) {
                if (i >= list.size()) {
                    break;
                }
                String str = list.get(i);
                z = true;
                if (!str.equals(".") && !str.equals(")") && !str.equals("]") && !str.equals("}")) {
                    z = false;
                    break;
                }
                i++;
            }
        }
        return z;
    }

    @Override // edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter
    public List<List<String>> extractSentences(String str, WordTokenizer wordTokenizer) {
        List<List<String>> createNewList = ListFactory.createNewList();
        this.sentenceSplitterIterator.setText(str);
        List<String> list = null;
        while (this.sentenceSplitterIterator.hasNext()) {
            String next = this.sentenceSplitterIterator.next();
            String peek = this.sentenceSplitterIterator.peek();
            if (peek != null && peek.equals(".")) {
                next = next + peek;
                this.sentenceSplitterIterator.next();
            }
            List<String> extractWords = wordTokenizer.extractWords(next);
            if (extractWords.size() != 0) {
                List<List<String>> splitSentenceWordList = splitSentenceWordList(extractWords);
                for (int i = 0; i < splitSentenceWordList.size(); i++) {
                    List<String> list2 = splitSentenceWordList.get(i);
                    boolean fixUpSentence = fixUpSentence(list2, list);
                    if (list2.size() > 0) {
                        if (fixUpSentence && !quoteOnlySentence(list2)) {
                            addSentence(list2, createNewList);
                            list = list2;
                        } else if (list != null) {
                            list.addAll(list2);
                        } else {
                            addSentence(list2, createNewList);
                            list = list2;
                        }
                    }
                }
            }
        }
        return createNewList;
    }

    public boolean quoteOnlySentence(List<String> list) {
        if (list.size() == 0) {
            return false;
        }
        String str = list.get(0);
        boolean z = str.equals(CharUtils.CHAR_END_OF_TEXT_SECTION_STRING) || (str.length() == 1 && CharUtils.isClosingQuote(str.charAt(0)));
        for (int i = 1; i < list.size(); i++) {
            String str2 = list.get(i);
            z = str2.equals(CharUtils.CHAR_END_OF_TEXT_SECTION_STRING) || (str2.length() == 1 && CharUtils.isClosingQuote(str2.charAt(0)));
            if (!z) {
                break;
            }
        }
        return z;
    }

    @Override // edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter
    public List<List<String>> extractSentences(String str) {
        return extractSentences(str, this.wordTokenizer);
    }

    @Override // edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter
    public int[] findSentenceOffsets(String str, List<List<String>> list) {
        int size = list.size();
        int[] iArr = new int[size + 1];
        int i = 0;
        for (int i2 = 0; i2 < size; i2++) {
            List<String> list2 = list.get(i2);
            iArr[i2] = i;
            int i3 = 0;
            for (int i4 = 0; i4 < list2.size(); i4++) {
                i3 += list2.get(i4).toString().length();
            }
            int i5 = 0;
            while (i5 < i3) {
                if (!CharUtils.isWhitespace(str.charAt(i))) {
                    i5++;
                }
                i++;
            }
        }
        iArr[size] = str.length();
        return iArr;
    }

    protected void addSentenceBad(List<String> list, List<List<String>> list2) {
        boolean z = false;
        int i = 0;
        while (true) {
            if (i >= list.size()) {
                break;
            }
            String str = list.get(i);
            if (str.length() > 1 && str.charAt(0) == 8217) {
                z = true;
                break;
            }
            i++;
        }
        if (!z) {
            list2.add(list);
            return;
        }
        StringBuffer stringBuffer = new StringBuffer();
        for (int i2 = 0; i2 < list.size(); i2++) {
            String str2 = list.get(i2);
            if (str2.length() > 1 && str2.charAt(0) == 8217) {
                str2 = (char) 58626 + str2.substring(1);
            }
            if (i2 > 0) {
                stringBuffer.append(" ");
            }
            stringBuffer.append(str2);
        }
        ICU4JBreakIteratorSentenceSplitterIterator iCU4JBreakIteratorSentenceSplitterIterator = new ICU4JBreakIteratorSentenceSplitterIterator(stringBuffer.toString());
        while (iCU4JBreakIteratorSentenceSplitterIterator.hasNext()) {
            StringTokenizer stringTokenizer = new StringTokenizer(iCU4JBreakIteratorSentenceSplitterIterator.next());
            List<String> createNewList = ListFactory.createNewList();
            while (stringTokenizer.hasMoreTokens()) {
                String nextToken = stringTokenizer.nextToken();
                if (nextToken.length() > 1 && nextToken.charAt(0) == 58626) {
                    nextToken = (char) 8217 + nextToken.substring(1);
                }
                createNewList.add(nextToken);
            }
            list2.add(createNewList);
        }
    }

    protected void addSentence(List<String> list, List<List<String>> list2) {
        if (list == null || list.size() <= 0) {
            return;
        }
        list2.add(list);
    }

    protected boolean isVerb(String str) {
        boolean z = false;
        if (this.partOfSpeechGuesser != null) {
            Map<String, MutableInteger> guessPartsOfSpeech = this.partOfSpeechGuesser.guessPartsOfSpeech(str);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> it = guessPartsOfSpeech.keySet().iterator();
            while (it.hasNext() && !z) {
                z = z || partOfSpeechTags.isVerbTag(it.next());
            }
        }
        return z;
    }

    protected boolean isProperNoun(String str) {
        boolean isNameOrPlace = this.names.isNameOrPlace(str);
        if (!isNameOrPlace && this.partOfSpeechGuesser != null) {
            boolean isCapitalLetter = CharUtils.isCapitalLetter(str.charAt(0));
            Map<String, MutableInteger> guessPartsOfSpeech = this.partOfSpeechGuesser.guessPartsOfSpeech(str);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> it = guessPartsOfSpeech.keySet().iterator();
            while (it.hasNext() && !isNameOrPlace) {
                String next = it.next();
                isNameOrPlace = partOfSpeechTags.isProperNounTag(next) || (partOfSpeechTags.isNounTag(next) && isCapitalLetter);
            }
        }
        return isNameOrPlace;
    }

    protected boolean isPronoun(String str) {
        boolean z = false;
        if (this.partOfSpeechGuesser != null) {
            Map<String, MutableInteger> guessPartsOfSpeech = this.partOfSpeechGuesser.guessPartsOfSpeech(str);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> it = guessPartsOfSpeech.keySet().iterator();
            while (it.hasNext() && !z) {
                z = partOfSpeechTags.isPronounTag(it.next());
            }
        }
        return z;
    }

    protected boolean isNoun(String str) {
        boolean z = false;
        if (this.partOfSpeechGuesser != null) {
            Map<String, MutableInteger> guessPartsOfSpeech = this.partOfSpeechGuesser.guessPartsOfSpeech(str);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> it = guessPartsOfSpeech.keySet().iterator();
            while (it.hasNext() && !z) {
                z = partOfSpeechTags.isNounTag(it.next());
            }
        }
        return z;
    }

    protected List<List<String>> splitSentenceWordList(List<String> list) {
        List<List<String>> createNewList = ListFactory.createNewList();
        List<String> createNewList2 = ListFactory.createNewList();
        for (int i = 0; i < list.size(); i++) {
            String str = list.get(i);
            createNewList2.add(str);
            if (str.equals(CharUtils.CHAR_END_OF_TEXT_SECTION_STRING)) {
                createNewList.add(createNewList2);
                createNewList2 = ListFactory.createNewList();
            } else if (str.equals(".")) {
                createNewList.add(createNewList2);
                createNewList2 = ListFactory.createNewList();
            }
        }
        if (createNewList2.size() > 0) {
            createNewList.add(createNewList2);
        }
        return createNewList;
    }

    protected boolean verbSeen(List<String> list) {
        boolean z = false;
        if (list != null) {
            for (int i = 0; i < list.size() - 1; i++) {
                z = z || isVerb(list.get(i));
                if (z) {
                    break;
                }
            }
        }
        return z;
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public Logger getLogger() {
        return this.logger;
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public void setLogger(Logger logger) {
        this.logger = logger;
    }
}
