package uk.ac.cam.ch.wwmm.opsin;

import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/opsin/Tokeniser.class */
class Tokeniser {
    private final ParseRules parseRules;
    private final Pattern matchCasCollectiveIndex = Pattern.compile("([\\[\\(\\{]([1-9][0-9]?[cC][iI][, ]?)+[\\]\\)\\}])+|[1-9][0-9]?[cC][iI]", 2);
    private final Pattern matchCompoundWithPhrase = Pattern.compile("(compd\\. with|compound with|and) ", 2);

    /* JADX INFO: Access modifiers changed from: package-private */
    public Tokeniser(ParseRules parseRules) {
        this.parseRules = parseRules;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public ParseRules getParseRules() {
        return this.parseRules;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public TokenizationResult tokenize(String str, boolean z) throws ParsingException {
        TokenizationResult tokenizationResult = z ? new TokenizationResult(WordTools.removeWhiteSpaceIfBracketsAreUnbalanced(str)) : new TokenizationResult(str);
        TokenizationResult tokenizationResult2 = null;
        while (true) {
            if (tokenizationResult.isSuccessfullyTokenized()) {
                break;
            }
            ParseRulesResults parses = this.parseRules.getParses(tokenizationResult.getUnparsedName());
            List<ParseTokens> parseTokensList = parses.getParseTokensList();
            tokenizationResult.setWorkingName(parses.getUninterpretableName());
            String substring = tokenizationResult.getUnparsedName().substring(0, tokenizationResult.getUnparsedName().length() - tokenizationResult.getWorkingName().length());
            if (isWordParsable(parseTokensList, tokenizationResult)) {
                parseWord(tokenizationResult, parseTokensList, substring, false);
                tokenizationResult2 = null;
            } else {
                if (tokenizationResult2 == null) {
                    tokenizationResult2 = new TokenizationResult(str);
                    tokenizationResult2.setErrorFields(tokenizationResult.getUnparsedName(), tokenizationResult.getWorkingName(), parses.getUnparseableName());
                }
                if (!fixWord(tokenizationResult, substring, z)) {
                    tokenizationResult.setErrorFields(tokenizationResult2.getUnparsedName(), tokenizationResult2.getUninterpretableName(), tokenizationResult2.getUnparsableName());
                    break;
                }
            }
        }
        return tokenizationResult;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public TokenizationResult tokenizeRightToLeft(ReverseParseRules reverseParseRules, String str, boolean z) throws ParsingException {
        TokenizationResult tokenizationResult = new TokenizationResult(str);
        TokenizationResult tokenizationResult2 = null;
        while (true) {
            if (tokenizationResult.isSuccessfullyTokenized()) {
                break;
            }
            ParseRulesResults parses = reverseParseRules.getParses(tokenizationResult.getUnparsedName());
            List<ParseTokens> parseTokensList = parses.getParseTokensList();
            tokenizationResult.setWorkingName(parses.getUninterpretableName());
            String substring = tokenizationResult.getUnparsedName().substring(tokenizationResult.getWorkingName().length());
            if (isWordParsableInReverse(parseTokensList, tokenizationResult)) {
                parseWord(tokenizationResult, parseTokensList, substring, true);
                tokenizationResult2 = null;
            } else {
                if (tokenizationResult2 == null) {
                    tokenizationResult2 = new TokenizationResult(str);
                    tokenizationResult2.setErrorFields(tokenizationResult.getUnparsedName(), tokenizationResult.getWorkingName(), parses.getUnparseableName());
                }
                if (!fixWordInReverse(tokenizationResult, substring, z)) {
                    tokenizationResult.setErrorFields(tokenizationResult2.getUnparsedName(), tokenizationResult2.getUninterpretableName(), tokenizationResult2.getUnparsableName());
                    break;
                }
            }
        }
        Collections.reverse(tokenizationResult.getParse().getWords());
        return tokenizationResult;
    }

    private boolean isWordParsableInReverse(List<ParseTokens> list, TokenizationResult tokenizationResult) {
        return list.size() > 0 && (tokenizationResult.isFullyInterpretable() || tokenizationResult.getWorkingName().charAt(tokenizationResult.getWorkingName().length() - 1) == ' ' || tokenizationResult.getWorkingName().charAt(tokenizationResult.getWorkingName().length() - 1) == '-');
    }

    private boolean isWordParsable(List<ParseTokens> list, TokenizationResult tokenizationResult) {
        return list.size() > 0 && (tokenizationResult.isFullyInterpretable() || tokenizationResult.getWorkingName().charAt(0) == ' ' || tokenizationResult.getWorkingName().charAt(0) == '-');
    }

    private void parseWord(TokenizationResult tokenizationResult, List<ParseTokens> list, String str, boolean z) throws ParsingException {
        addParseWords(list, str, tokenizationResult.getParse(), z);
        if (tokenizationResult.isFullyInterpretable()) {
            tokenizationResult.setUnparsedName(tokenizationResult.getWorkingName());
        } else {
            String workingName = tokenizationResult.getWorkingName();
            tokenizationResult.setUnparsedName(z ? (workingName.length() <= 3 || !workingName.endsWith(" - ")) ? workingName.substring(0, workingName.length() - 1) : workingName.substring(0, workingName.length() - 3) : (workingName.length() <= 3 || !workingName.startsWith(" - ")) ? workingName.substring(1) : workingName.substring(3));
        }
    }

    private void addParseWords(List<ParseTokens> list, String str, Parse parse, boolean z) throws ParsingException {
        List<ParseWord> splitIntoParseWords = WordTools.splitIntoParseWords(list, str);
        if (z) {
            Collections.reverse(splitIntoParseWords);
        }
        Iterator<ParseWord> it = splitIntoParseWords.iterator();
        while (it.hasNext()) {
            parse.addWord(it.next());
        }
    }

    private boolean fixWord(TokenizationResult tokenizationResult, String str, boolean z) throws ParsingException {
        Matcher matcher = this.matchCompoundWithPhrase.matcher(tokenizationResult.getWorkingName());
        if (matcher.lookingAt() && lastParsedWordWasFullOrFunctionalTerm(tokenizationResult)) {
            tokenizationResult.setUnparsedName(str + tokenizationResult.getWorkingName().substring(matcher.group().length()));
            return true;
        }
        if (this.matchCasCollectiveIndex.matcher(tokenizationResult.getWorkingName()).matches()) {
            tokenizationResult.setUnparsedName(str);
            return true;
        }
        if (!z) {
            return false;
        }
        if (reverseSpaceRemoval(tokenizationResult.getParse().getWords(), tokenizationResult)) {
            return true;
        }
        int indexOf = tokenizationResult.getWorkingName().indexOf(32);
        if (indexOf == -1) {
            return false;
        }
        tokenizationResult.setUnparsedName(str + tokenizationResult.getWorkingName().substring(0, indexOf) + tokenizationResult.getWorkingName().substring(indexOf + 1));
        return true;
    }

    private boolean lastParsedWordWasFullOrFunctionalTerm(TokenizationResult tokenizationResult) throws ParsingException {
        List<ParseWord> words = tokenizationResult.getParse().getWords();
        if (words.size() <= 0) {
            return false;
        }
        Iterator<ParseTokens> it = words.get(words.size() - 1).getParseTokens().iterator();
        while (it.hasNext()) {
            WordType determineWordType = OpsinTools.determineWordType(it.next().getAnnotations());
            if (determineWordType.equals(WordType.full) || determineWordType.equals(WordType.functionalTerm)) {
                return true;
            }
        }
        return false;
    }

    private boolean fixWordInReverse(TokenizationResult tokenizationResult, String str, boolean z) {
        int lastIndexOf;
        if (!z || (lastIndexOf = tokenizationResult.getWorkingName().lastIndexOf(32)) == -1) {
            return false;
        }
        tokenizationResult.setUnparsedName(tokenizationResult.getWorkingName().substring(0, lastIndexOf) + tokenizationResult.getWorkingName().substring(lastIndexOf + 1) + str);
        return true;
    }

    private boolean reverseSpaceRemoval(List<ParseWord> list, TokenizationResult tokenizationResult) throws ParsingException {
        boolean z = false;
        if (!list.isEmpty()) {
            ParseWord parseWord = list.get(list.size() - 1);
            ParseRulesResults parses = this.parseRules.getParses(parseWord.getWord() + tokenizationResult.getUnparsedName());
            List<ParseTokens> parseTokensList = parses.getParseTokensList();
            String uninterpretableName = parses.getUninterpretableName();
            String str = parseWord.getWord() + tokenizationResult.getUnparsedName().substring(0, tokenizationResult.getUnparsedName().length() - uninterpretableName.length());
            if (str.length() > parseWord.getWord().length() && parseTokensList.size() > 0 && (uninterpretableName.equals("") || uninterpretableName.charAt(0) == ' ' || uninterpretableName.charAt(0) == '-')) {
                tokenizationResult.getParse().removeWord(parseWord);
                Iterator<ParseWord> it = WordTools.splitIntoParseWords(parseTokensList, str).iterator();
                while (it.hasNext()) {
                    tokenizationResult.getParse().addWord(it.next());
                }
                if (uninterpretableName.equals("")) {
                    tokenizationResult.setUnparsedName(uninterpretableName);
                } else {
                    tokenizationResult.setUnparsedName(uninterpretableName.substring(1));
                }
                z = true;
            }
        }
        return z;
    }
}
