package uk.ac.cam.ch.wwmm.chemicaltagger;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import uk.ac.cam.ch.wwmm.oscar.document.Token;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/PostProcessTags.class */
public class PostProcessTags {
    private static HashSet<String> tagSet = Utils.loadsTagsFromFile(PostProcessTags.class);
    private final POSContainer posContainer;
    private final List<Token> tokenList;
    private final List<String> combinedTags;

    public PostProcessTags(POSContainer pOSContainer) {
        this.posContainer = pOSContainer;
        this.tokenList = pOSContainer.getWordTokenList();
        this.combinedTags = pOSContainer.getCombinedTagsList();
    }

    public void correctCombinedTagsList() {
        correctCombinedTagsList(new ArrayList());
    }

    public void correctCombinedTagsList(List<String> list) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < this.combinedTags.size(); i++) {
            String str = this.combinedTags.get(i);
            Token token = this.tokenList.get(i);
            this.combinedTags.get(i);
            String surface = token.getSurface();
            String correctMisTaggedNouns = correctMisTaggedNouns(i, str, surface);
            if (correctMisTaggedNouns.equals(str)) {
                correctMisTaggedNouns = correctMisTaggedVerbs(i, str, surface);
            }
            if (correctMisTaggedNouns.equals(str)) {
                correctMisTaggedNouns = correctMisTaggedDigits(i, str, surface);
            }
            if (correctMisTaggedNouns.equals(str)) {
                correctMisTaggedNouns = correctMisTaggedUnits(i, str, surface);
            }
            if (correctMisTaggedNouns.equals(str)) {
                correctMisTaggedNouns = correctMisTaggedMisc(i, str, surface);
            }
            if (tagSet.contains(surface)) {
                token.setSurface(surface.toLowerCase());
            }
            if (list.contains(correctMisTaggedNouns)) {
                arrayList2.add(str);
            } else {
                arrayList2.add(correctMisTaggedNouns);
            }
            arrayList.add(surface);
        }
        this.posContainer.setCombinedTagsList(arrayList2);
    }

    private String correctMisTaggedNouns(int i, String str, String str2) {
        return (!str.toLowerCase().startsWith("nn-mixture") || stringAfter(Arrays.asList("in-of"), i, this.combinedTags)) ? ((str2.equalsIgnoreCase("acid") || str2.equalsIgnoreCase("base")) && str.startsWith("NN")) ? "NN-CHEMENTITY" : (str2.equalsIgnoreCase("formula") && stringAfter(Arrays.asList("cd", "cd-alphanum", "nn-identifier"), i, this.combinedTags)) ? "NN-CHEMENTITY" : (Arrays.asList("amber", "bronze", "cream", "fawn", "gold", "ivory", "lavender", "tan").contains(str2.toLowerCase()) && stringAfter(Arrays.asList("nn-state"), i, this.combinedTags)) ? "JJ" : str : "NN-CHEMENTITY";
    }

    private String correctMisTaggedVerbs(int i, String str, String str2) {
        String lowerCase = str.toLowerCase();
        if (str2.equalsIgnoreCase("yield")) {
            List<String> asList = Arrays.asList("nn-percent");
            List<String> asList2 = Arrays.asList("in-of", "colon");
            if (stringBefore(asList, i, this.combinedTags) || stringAfter(asList2, i, this.combinedTags)) {
                return "NN-YIELD";
            }
            List<String> asList3 = Arrays.asList("nn-chementity");
            List<String> asList4 = Arrays.asList("dt", "dt-the");
            if (stringAfter(asList3, i, this.combinedTags) && stringBefore(asList4, i, this.combinedTags)) {
                return "JJ-COMPOUND";
            }
        }
        if (lowerCase.startsWith("vb") && str2.equalsIgnoreCase("form")) {
            return "VB-YIELD";
        }
        if (lowerCase.startsWith("vb-filter") && stringBefore(Arrays.asList("oscar-cj", "jj-chem"), i, this.combinedTags)) {
            return "NN";
        }
        if ((endsWithCaseInsensitive(str2, "dilute") || endsWithCaseInsensitive(str2, "diluted") || endsWithCaseInsensitive(str2, "concentrated") || endsWithCaseInsensitive(str2, "dry") || endsWithCaseInsensitive(str2, "dried")) && stringAfter(Arrays.asList("oscar-cm", "nn-chementity", "oscar-cj", "jj-chem"), i, this.combinedTags)) {
            return "JJ-CHEM";
        }
        if (lowerCase.startsWith("vb-") || (lowerCase.startsWith("nn") && !lowerCase.startsWith("nn-state") && !lowerCase.startsWith("nn-apparatus") && !lowerCase.startsWith("nn-cycle") && !lowerCase.startsWith("nn-temp") && !lowerCase.startsWith("nn-pressure") && !lowerCase.startsWith("nn-time") && !lowerCase.startsWith("nn-molar") && !lowerCase.startsWith("nn-vacuum") && !lowerCase.startsWith("nnp"))) {
            List<String> asList5 = Arrays.asList("dt", "jj", "jj-chem", "dt-the");
            List<String> asList6 = Arrays.asList("jj", "nn-chementity", "nn-mixture", "nn-apparatus", "nn", "jj-chem");
            List<String> asList7 = Arrays.asList("stop", "comma");
            if (stringBefore(asList5, i, this.combinedTags) && ((i == this.combinedTags.size() || stringAfter(asList7, i, this.combinedTags)) && !lowerCase.startsWith("nn-"))) {
                return "NN";
            }
            if (stringBefore(asList5, i, this.combinedTags) && stringAfter(asList6, i, this.combinedTags) && !lowerCase.startsWith("nn-chementity") && adjObjectExists(i)) {
                return "JJ-CHEM";
            }
        }
        if (lowerCase.startsWith("vb-") || lowerCase.startsWith("nn-synthesize")) {
            List<String> asList8 = Arrays.asList("dt-the", "dt");
            List<String> asList9 = Arrays.asList("vb");
            if (stringBefore(asList8, i, this.combinedTags) && stringAfterStartsWith(asList9, i, this.combinedTags)) {
                return "NN-CHEMENTITY";
            }
        }
        if (lowerCase.startsWith("vb") && Utils.containsNumber(str2)) {
            return "NN";
        }
        if (lowerCase.startsWith("vbn") || lowerCase.startsWith("vbg") || lowerCase.startsWith("vb-")) {
            List<String> asList10 = Arrays.asList("oscar-cm", "nns", "nn-chementity", "oscar-cj", "jj-chem", "nnp");
            List<String> asList11 = Arrays.asList("dt", "rb", "rb-conj", "dt-the", "stop", "in-with", "in-of", "in-under");
            if (stringAfter(asList10, i, this.combinedTags) && stringBefore(asList11, i, this.combinedTags)) {
                return "JJ-CHEM";
            }
        }
        if (lowerCase.startsWith("vb")) {
            List<String> asList12 = Arrays.asList("to");
            List<String> asList13 = Arrays.asList("vb-heat");
            List<String> asList14 = Arrays.asList("stop");
            if (stringBefore(asList12, i, this.combinedTags) && stringBefore(asList13, i - 1, this.combinedTags) && stringAfter(asList14, i, this.combinedTags)) {
                return "NN";
            }
        }
        if (lowerCase.startsWith("vb") && (str2.toLowerCase().endsWith("ing") || str2.toLowerCase().endsWith("ed"))) {
            List<String> asList15 = Arrays.asList("nn", "oscar-cm", "nns", "nn-chementity", "oscar-cj", "jj-chem", "jj", "nnp", "nn-state", "nn-apparatus");
            List<String> asList16 = Arrays.asList("dt", "dt-the", "cd", "oscar-cm");
            List<String> asList17 = Arrays.asList("in-of");
            if (!stringAfter(Arrays.asList("oscar-cm"), i, this.combinedTags) || !stringBefore(Arrays.asList("oscar-cm", "nn-chementity"), i, this.combinedTags)) {
                if (stringAfter(asList15, i, this.combinedTags) && stringBefore(asList16, i, this.combinedTags)) {
                    return "JJ";
                }
                if (str2.toLowerCase().endsWith("ing") && stringBefore(asList16, i, this.combinedTags) && !stringAfter(asList17, i, this.combinedTags)) {
                    return "JJ-CHEM";
                }
            }
        }
        if (lowerCase.startsWith("vb") && !str2.toLowerCase().endsWith("ing")) {
            List<String> asList18 = Arrays.asList("dt", "dt-the", "in-in", "in-of", "rb");
            List<String> asList19 = Arrays.asList("nn", "oscar-cm", "nns", "nn-chementity", "oscar-cj", "jj-chem", "jj", "nnp", "nn-state", "nn-apparatus");
            List<String> asList20 = Arrays.asList("oscar-cm", "nn-chementity", "oscar-cj", "jj-chem");
            if (i != 0 && !this.tokenList.get(i - 1).getSurface().equals("that")) {
                if (stringAfter(asList20, i, this.combinedTags) && stringBefore(asList18, i, this.combinedTags)) {
                    return "JJ-CHEM";
                }
                if (stringBefore(asList18, i, this.combinedTags) && stringAfter(asList19, i, this.combinedTags)) {
                    return "JJ";
                }
                if (stringBefore(asList18, i, this.combinedTags) && i + 1 < this.combinedTags.size() && this.combinedTags.get(i + 1).toLowerCase().startsWith("nn")) {
                    return "JJ";
                }
            }
        }
        return str;
    }

    private String correctMisTaggedUnits(int i, String str, String str2) {
        return (str2.length() == 1 && Character.isLowerCase(str2.charAt(0)) && stringAfter(Arrays.asList("sym"), i, this.combinedTags)) ? "NN" : str;
    }

    private String correctMisTaggedDigits(int i, String str, String str2) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.startsWith("nn-") && Utils.containsNumber(str2)) {
            List<String> asList = Arrays.asList("in-of", "jj", "nn-chementity", "comma");
            List<String> asList2 = Arrays.asList("-lrb-", "stop", "comma");
            if (stringBefore(asList, i, this.combinedTags) && (stringAfter(asList2, i, this.combinedTags) || i == this.combinedTags.size())) {
                return "CD-ALPHANUM";
            }
        }
        return (lowerCase.equals("cd-alphanum") && (stringAfter(Arrays.asList("nn-vol", "nn-mass"), i, this.combinedTags) || str2.contains(".") || str2.length() > 4)) ? "CD" : str;
    }

    private String correctMisTaggedMisc(int i, String str, String str2) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.equals("nnp") && StringUtils.equalsIgnoreCase(str2, "M")) {
            return "NN-MOLAR";
        }
        if (i != 0 && lowerCase.equals("nns")) {
            List<String> asList = Arrays.asList("stop");
            if (str2.endsWith("s") && Character.isUpperCase(str2.charAt(0)) && !stringBefore(asList, i, this.combinedTags)) {
                return "NNPS";
            }
        }
        if (lowerCase.equals("rb") && str2.length() < 2) {
            return Character.isUpperCase(str2.charAt(0)) ? "NNP" : "NN";
        }
        if (str2.equals("M") && stringBefore(Arrays.asList("cd"), i, this.combinedTags)) {
            return "NN-MOLAR";
        }
        if (str2.equals("K") && stringBefore(Arrays.asList("cd"), i, this.combinedTags)) {
            return "NN-TEMP";
        }
        if (lowerCase.equals("''")) {
            return "FW";
        }
        if (lowerCase.equals("nn-mixture") && stringAfter(Arrays.asList("vbd"), i, this.combinedTags)) {
            return "NN-CHEMENTITY";
        }
        if (lowerCase.startsWith("nn-concentrate")) {
            List<String> asList2 = Arrays.asList("jj", "oscar-cj", "jj-chem");
            List<String> asList3 = Arrays.asList("in-of");
            if (!stringBefore(asList2, i, this.combinedTags) && stringAfter(asList3, i, this.combinedTags)) {
                return "NN";
            }
        }
        if (i != 0 && lowerCase.startsWith("nn-add") && !stringBefore(Arrays.asList("stop", "comma", "colon"), i, this.combinedTags) && Character.isUpperCase(str2.charAt(0))) {
            return "NNP";
        }
        if (lowerCase.startsWith("jj") || lowerCase.startsWith("nnp")) {
            List<String> asList4 = Arrays.asList("nn-campaign");
            if ((stringAfter(asList4, i, this.combinedTags) || string2After(asList4, i, this.combinedTags)) && Character.isUpperCase(str2.charAt(0))) {
                return "NNP";
            }
        }
        if (str2.toLowerCase().equals("addition")) {
            List<String> asList5 = Arrays.asList("in-in");
            List<String> asList6 = Arrays.asList("comma", "stop");
            if (stringBefore(asList5, i, this.combinedTags) && stringAfter(asList6, i, this.combinedTags)) {
                return "NN";
            }
        }
        if (str2.toLowerCase().startsWith("obtain")) {
            List<String> asList7 = Arrays.asList("in-from");
            List<String> asList8 = Arrays.asList("nnp");
            if (stringAfter(asList7, i, this.combinedTags) && string2After(asList8, i, this.combinedTags)) {
                return "VB";
            }
        }
        if (lowerCase.startsWith("nn-synthesize")) {
            List<String> asList9 = Arrays.asList("nn-apparatus");
            List<String> asList10 = Arrays.asList("dt", "nn-apparatus", "rb-conj", "dt-the");
            if (stringAfter(asList9, i, this.combinedTags) && stringBefore(asList10, i, this.combinedTags)) {
                return "JJ-CHEM";
            }
        }
        if (str2.equals("D") && lowerCase.equals("nn-time") && stringBefore(Arrays.asList("in-in"), i, this.combinedTags)) {
            return "NN";
        }
        if (str2.length() == 1 && Character.isLetter(str2.charAt(0))) {
            char charAt = str2.charAt(0);
            List<String> asList11 = Arrays.asList("-lrb-");
            List<String> asList12 = Arrays.asList("-rrb-");
            if (((stringBefore(asList11, i, this.combinedTags) || i == 0) && stringAfter(asList12, i, this.combinedTags)) || (i == 0 && tokenAfter(Arrays.asList(".", ":"), i))) {
                return ((charAt == 'd' || charAt == 'D' || charAt == 'h' || charAt == 's') && string2Before(Arrays.asList("nn-time"), i, this.combinedTags)) ? "NN-TIME" : (charAt == 'g' && string2Before(Arrays.asList("nn-mass"), i, this.combinedTags)) ? "NN-MASS" : (charAt == 'K' && string2Before(Arrays.asList("nn-temp"), i, this.combinedTags)) ? "NN-TEMP" : ((charAt == 'l' || charAt == 'L') && string2Before(Arrays.asList("nn-vol"), i, this.combinedTags)) ? "NN-VOL" : ((charAt == 'g' || charAt == 'l' || charAt == 's') && string2Before(Arrays.asList("oscar-cm"), i, this.combinedTags)) ? "NN-STATE" : "NN-IDENTIFIER";
            }
            if (stringBefore(Arrays.asList("nn-example", "nn-method", "nn-chementity", "in-of"), i, this.combinedTags) && !isEnglishUseOfAorI(charAt, i)) {
                return "NN-IDENTIFIER";
            }
        }
        if (str2.equalsIgnoreCase("precipitate")) {
            if (lowerCase.startsWith("nn")) {
                return "NN-CHEMENTITY";
            }
            if (lowerCase.startsWith("vb")) {
                return "VB-PRECIPITATE";
            }
        }
        if (i != 0 && lowerCase.equals("nn")) {
            List<String> asList13 = Arrays.asList("stop");
            if ((Character.isUpperCase(str2.charAt(0)) && !stringBefore(asList13, i, this.combinedTags)) || !str2.toLowerCase().equals(str2)) {
                return "NNP";
            }
        }
        return str;
    }

    private boolean isEnglishUseOfAorI(char c, int i) {
        List<String> asList = Arrays.asList("stop", "colon");
        return (c == 'A' || c == 'I') ? i == 0 || stringBefore(asList, i, this.combinedTags) : (c != 'a' || i == 0 || stringBefore(asList, i, this.combinedTags) || stringAfter(asList, i, this.combinedTags)) ? false : true;
    }

    private boolean adjObjectExists(int i) {
        for (int i2 = i + 1; i2 < this.combinedTags.size() && !this.combinedTags.get(i2).toLowerCase().startsWith("to") && !this.combinedTags.get(i2).toLowerCase().startsWith("in"); i2++) {
            if (this.combinedTags.get(i2).toLowerCase().startsWith("nn")) {
                return true;
            }
        }
        return false;
    }

    private boolean tokenAfter(List<String> list, int i) {
        int i2 = i + 1;
        return i2 < this.tokenList.size() && list.contains(this.tokenList.get(i2).getSurface().toLowerCase());
    }

    private boolean stringBefore(List<String> list, int i, List<String> list2) {
        return i != 0 && list.contains(list2.get(i - 1).toLowerCase());
    }

    private boolean string2Before(List<String> list, int i, List<String> list2) {
        return i > 1 && list.contains(list2.get(i - 2).toLowerCase());
    }

    private boolean stringAfter(List<String> list, int i, List<String> list2) {
        int i2 = i + 1;
        return i2 < list2.size() && list.contains(list2.get(i2).toLowerCase());
    }

    private boolean stringAfterStartsWith(List<String> list, int i, List<String> list2) {
        int i2 = i + 1;
        if (i2 >= list2.size()) {
            return false;
        }
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (list2.get(i2).toLowerCase().startsWith(it.next())) {
                return true;
            }
        }
        return false;
    }

    private boolean string2After(List<String> list, int i, List<String> list2) {
        int i2 = i + 2;
        return i2 < list2.size() && list.contains(list2.get(i2).toLowerCase());
    }

    private boolean endsWithCaseInsensitive(String str, String str2) {
        if (str2.length() > str.length()) {
            return false;
        }
        return str.regionMatches(true, str.length() - str2.length(), str2, 0, str2.length());
    }
}
