package uk.ac.cam.ch.wwmm.chemicaltagger;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.xmlcml.cml.element.CMLBond;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/PostProcessTags.class */
public class PostProcessTags {
    private static HashSet<String> tagSet = Utils.loadsTagsFromFile(PostProcessTags.class);

    public POSContainer correctCombinedTagsList(POSContainer pOSContainer) {
        return correctCombinedTagsList(pOSContainer, new ArrayList());
    }

    public POSContainer correctCombinedTagsList(POSContainer pOSContainer, List<String> list) {
        List<String> wordTokenList = pOSContainer.getWordTokenList();
        List<String> combinedTagsList = pOSContainer.getCombinedTagsList();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < combinedTagsList.size(); i++) {
            String str = combinedTagsList.get(i);
            String str2 = wordTokenList.get(i);
            String correctMisTaggedMisc = correctMisTaggedMisc(combinedTagsList, i, str, str2, correctMisTaggedUnits(combinedTagsList, i, str, str2, correctMisTaggedDigits(combinedTagsList, i, str, str2, correctMisTaggedVerbs(wordTokenList, combinedTagsList, i, str, str2, correctMisTaggedNouns(wordTokenList, combinedTagsList, i, str, str2, combinedTagsList.get(i))))));
            if (tagSet.contains(str2)) {
                str2 = str2.toLowerCase();
            }
            if (list.contains(correctMisTaggedMisc)) {
                arrayList2.add(str);
            } else {
                arrayList2.add(correctMisTaggedMisc);
            }
            arrayList.add(str2);
        }
        pOSContainer.setWordTokenList(arrayList);
        pOSContainer.setCombinedTagsList(arrayList2);
        return pOSContainer;
    }

    private String correctMisTaggedNouns(List<String> list, List<String> list2, int i, String str, String str2, String str3) {
        if (str.toLowerCase().startsWith("nn-mixture") && !stringAfter(Arrays.asList("in-of"), i, list2)) {
            str3 = "NN-CHEMENTITY";
        }
        return str3;
    }

    private String correctMisTaggedVerbs(List<String> list, List<String> list2, int i, String str, String str2, String str3) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.startsWith("vb-") || (lowerCase.startsWith("nn") && !lowerCase.startsWith("nn-state") && !lowerCase.startsWith("nn-apparatus") && !lowerCase.startsWith("nn-cycle") && !lowerCase.startsWith("nn-temp") && !lowerCase.startsWith("nn-pressure") && !lowerCase.startsWith("nn-time") && !lowerCase.startsWith("nn-molar") && !lowerCase.startsWith("nn-vacuum") && !lowerCase.startsWith("nnp"))) {
            List<String> asList = Arrays.asList("dt", "jj", "jj-chem", "dt-the");
            List<String> asList2 = Arrays.asList("jj", "nn-chementity", "nn-mixture", "nn-apparatus", "nn", "jj-chem");
            List<String> asList3 = Arrays.asList("stop", "comma");
            if (stringBefore(asList, i, list2) && ((i == list2.size() || stringAfter(asList3, i, list2)) && !lowerCase.startsWith("nn-"))) {
                str3 = "NN";
            } else if (stringBefore(asList, i, list2) && stringAfter(asList2, i, list2) && !lowerCase.startsWith("nn-chementity") && adjObjectExists(list2, i)) {
                str3 = "JJ-CHEM";
            }
        }
        if (lowerCase.startsWith("vb-") || lowerCase.startsWith("nn-synthesize")) {
            List<String> asList4 = Arrays.asList("dt-the", "dt");
            List<String> asList5 = Arrays.asList("vb");
            if (stringBefore(asList4, i, list2) && stringAfterStartsWith(asList5, i, list2)) {
                str3 = "NN-CHEMENTITY";
            }
        }
        if (lowerCase.startsWith("vb") && Utils.containsNumber(str2)) {
            str3 = "NN";
        }
        if (lowerCase.startsWith("vbn") || lowerCase.startsWith("vbg") || lowerCase.startsWith("vb-")) {
            List<String> asList6 = Arrays.asList("oscar-cm", "nns", "nn-chementity", "oscar-cj", "jj-chem", "nnp");
            List<String> asList7 = Arrays.asList("dt", "rb", "rb-conj", "dt-the", "stop", "in-with", "in-of", "in-under");
            if (stringAfter(asList6, i, list2) && stringBefore(asList7, i, list2)) {
                str3 = "JJ-CHEM";
            }
        }
        if (str2.equalsIgnoreCase("yield")) {
            str3 = correctTaggingOfVbYield(list2, i, str3);
        }
        if (lowerCase.startsWith("vb") && str2.equalsIgnoreCase("form")) {
            str3 = "VB-YIELD";
        }
        if (lowerCase.startsWith("vb-filter") && stringBefore(Arrays.asList("oscar-cj", "jj-chem"), i, list2)) {
            str3 = "NN";
        }
        if (lowerCase.startsWith("vb")) {
            List<String> asList8 = Arrays.asList("to");
            List<String> asList9 = Arrays.asList("vb-heat");
            List<String> asList10 = Arrays.asList("stop");
            if (stringBefore(asList8, i, list2) && stringBefore(asList9, i - 1, list2) && stringAfter(asList10, i, list2)) {
                str3 = "NN";
            }
        }
        if (lowerCase.startsWith("vb-dilute") && stringAfter(Arrays.asList("oscar-cm"), i, list2)) {
            str3 = "JJ-CHEM";
        }
        if (lowerCase.startsWith("vb") && (str2.toLowerCase().endsWith("ing") || str2.toLowerCase().endsWith("ed"))) {
            List<String> asList11 = Arrays.asList("nn", "oscar-cm", "nns", "nn-chementity", "oscar-cj", "jj-chem", "jj", "nnp", "nn-state", "nn-apparatus");
            List<String> asList12 = Arrays.asList("dt", "dt-the", "cd", "oscar-cm");
            List<String> asList13 = Arrays.asList("in-of");
            if (stringAfter(asList11, i, list2) && stringBefore(asList12, i, list2)) {
                str3 = "JJ";
            } else if (str2.toLowerCase().endsWith("ing") && stringBefore(asList12, i, list2) && !stringAfter(asList13, i, list2)) {
                str3 = "JJ-CHEM";
            }
        }
        if (lowerCase.startsWith("vb") && !str2.toLowerCase().endsWith("ing")) {
            List<String> asList14 = Arrays.asList("dt", "dt-the", "in-in", "in-of", "rb");
            List<String> asList15 = Arrays.asList("nn", "oscar-cm", "nns", "nn-chementity", "oscar-cj", "jj-chem", "jj", "nnp", "nn-state", "nn-apparatus");
            List<String> asList16 = Arrays.asList("oscar-cm", "nn-chementity", "oscar-cj", "jj-chem");
            if (i != 0 && !list.get(i - 1).equals("that")) {
                if (stringAfter(asList16, i, list2) && stringBefore(asList14, i, list2)) {
                    str3 = "JJ-CHEM";
                } else if (stringBefore(asList14, i, list2) && stringAfter(asList15, i, list2)) {
                    str3 = "JJ";
                } else if (stringBefore(asList14, i, list2) && i + 1 < list2.size() && list2.get(i + 1).toLowerCase().startsWith("nn")) {
                    str3 = "JJ";
                }
            }
        }
        return str3;
    }

    private String correctTaggingOfVbYield(List<String> list, int i, String str) {
        if (stringBefore(Arrays.asList("nn-percent"), i, list)) {
            return "NN-YIELD";
        }
        return (stringAfter(Arrays.asList("nn-chementity"), i, list) && stringBefore(Arrays.asList("dt", "dt-the"), i, list)) ? "JJ-COMPOUND" : str;
    }

    private String correctMisTaggedUnits(List<String> list, int i, String str, String str2, String str3) {
        List<String> asList = Arrays.asList("sym");
        if (str2.length() == 1 && Character.isLowerCase(str2.charAt(0)) && stringAfter(asList, i, list)) {
            str3 = "NN";
        }
        return str3;
    }

    private String correctMisTaggedDigits(List<String> list, int i, String str, String str2, String str3) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.startsWith("nn-") && Utils.containsNumber(str2)) {
            List<String> asList = Arrays.asList("in-of", "jj", "nn-chementity", "comma");
            List<String> asList2 = Arrays.asList("-lrb-", "stop", "comma");
            if (stringBefore(asList, i, list) && (stringAfter(asList2, i, list) || i == list.size())) {
                str3 = "CD-ALPHANUM";
            }
        }
        if (lowerCase.equals("cd-alphanum") && (stringAfter(Arrays.asList("nn-vol", "nn-mass"), i, list) || str2.contains(".") || str2.length() > 4)) {
            str3 = "CD";
        }
        return str3;
    }

    private String correctMisTaggedMisc(List<String> list, int i, String str, String str2, String str3) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.equals("nnp") && StringUtils.equalsIgnoreCase(str2, "M")) {
            str3 = "NN-MOLAR";
        }
        if (i != 0 && lowerCase.equals("nns")) {
            List<String> asList = Arrays.asList("stop");
            if (str2.endsWith("s") && Character.isUpperCase(str2.charAt(0)) && !stringBefore(asList, i, list)) {
                str3 = "NNPS";
            }
        }
        if (lowerCase.equals("rb") && str2.length() < 2) {
            str3 = Character.isUpperCase(str2.charAt(0)) ? "NNP" : "NN";
        }
        if (i != 0 && lowerCase.equals("nn")) {
            List<String> asList2 = Arrays.asList("stop");
            if ((Character.isUpperCase(str2.charAt(0)) && !stringBefore(asList2, i, list)) || !str2.toLowerCase().equals(str2)) {
                str3 = "NNP";
            }
        }
        if (i != 0 && (lowerCase.startsWith("vb") || lowerCase.startsWith("nn-"))) {
            List<String> asList3 = Arrays.asList("stop", "rrb", "comma");
            List<String> asList4 = Arrays.asList("nnp", "nns", "nn", "nnp-acronym");
            if (Character.isUpperCase(str2.charAt(0)) && !stringBefore(asList3, i, list) && stringAfter(asList4, i, list)) {
                str3 = "JJ-CHEM";
            }
        }
        if (str2.equals("M") && stringBefore(Arrays.asList("cd"), i, list)) {
            str3 = "NN-MOLAR";
        }
        if (str2.equals("K") && stringBefore(Arrays.asList("cd"), i, list)) {
            str3 = "NN-TEMP";
        }
        if (lowerCase.equals("''")) {
            str3 = "FW";
        }
        if (lowerCase.equals("nn-mixture") && stringAfter(Arrays.asList("vbd"), i, list)) {
            str3 = "NN-CHEMENTITY";
        }
        if (lowerCase.startsWith("nn-concentrate")) {
            List<String> asList5 = Arrays.asList("jj", "oscar-cj", "jj-chem");
            List<String> asList6 = Arrays.asList("in-of");
            if (!stringBefore(asList5, i, list) && stringAfter(asList6, i, list)) {
                str3 = "NN";
            }
        }
        if (i != 0 && lowerCase.startsWith("nn-add") && !stringBefore(Arrays.asList("stop", "comma", "colon"), i, list) && Character.isUpperCase(str2.charAt(0))) {
            str3 = "NNP";
        }
        if (lowerCase.startsWith("jj") || lowerCase.startsWith("nnp")) {
            List<String> asList7 = Arrays.asList("nn-campaign");
            if ((stringAfter(asList7, i, list) || string2after(asList7, i, list)) && Character.isUpperCase(str2.charAt(0))) {
                str3 = "NNP";
            }
        }
        if (str2.toLowerCase().equals("addition")) {
            List<String> asList8 = Arrays.asList("in-in");
            List<String> asList9 = Arrays.asList("comma", "stop");
            if (stringBefore(asList8, i, list) && stringAfter(asList9, i, list)) {
                str3 = "NN";
            }
        }
        if (str2.toLowerCase().startsWith("obtain")) {
            List<String> asList10 = Arrays.asList("in-from");
            List<String> asList11 = Arrays.asList("nnp");
            if (stringAfter(asList10, i, list) && string2after(asList11, i, list)) {
                str3 = "VB";
            }
        }
        if (lowerCase.startsWith("nn-synthesize")) {
            List<String> asList12 = Arrays.asList("nn-apparatus");
            List<String> asList13 = Arrays.asList("dt", "nn-apparatus", "rb-conj", "dt-the");
            if (stringAfter(asList12, i, list) && stringBefore(asList13, i, list)) {
                str3 = "JJ-CHEM";
            }
        }
        if (str2.equals(CMLBond.DOUBLE_D) && lowerCase.equals("nn-time") && stringBefore(Arrays.asList("in-in"), i, list)) {
            str3 = "NN";
        }
        if (str2.length() == 1 && ((Character.isUpperCase(str2.charAt(0)) || isValidSingleLetterRomanNumber(str2.charAt(0))) && (stringBefore(Arrays.asList("nn-example", "nn-method"), i, list) || (stringBefore(Arrays.asList("-lrb-"), i, list) && stringAfter(Arrays.asList("-rrb-"), i, list))))) {
            str3 = "NN-IDENTIFIER";
        }
        return str3;
    }

    private boolean isValidSingleLetterRomanNumber(char c) {
        return c == 'i' || c == 'I' || c == 'v' || c == 'V' || c == 'x' || c == 'X';
    }

    private boolean adjObjectExists(List<String> list, int i) {
        for (int i2 = i + 1; i2 < list.size() && !list.get(i2).toLowerCase().startsWith("to") && !list.get(i2).toLowerCase().startsWith("in"); i2++) {
            if (list.get(i2).toLowerCase().startsWith("nn")) {
                return true;
            }
        }
        return false;
    }

    private boolean stringBefore(List<String> list, int i, List<String> list2) {
        return i != 0 && list.contains(list2.get(i - 1).toLowerCase());
    }

    private boolean stringAfter(List<String> list, int i, List<String> list2) {
        int i2 = i + 1;
        return i2 < list2.size() && list.contains(list2.get(i2).toLowerCase());
    }

    private boolean stringAfterStartsWith(List<String> list, int i, List<String> list2) {
        int i2 = i + 1;
        if (i2 >= list2.size()) {
            return false;
        }
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (list2.get(i2).toLowerCase().startsWith(it.next())) {
                return true;
            }
        }
        return false;
    }

    private boolean string2after(List<String> list, int i, List<String> list2) {
        int i2 = i + 2;
        return i2 < list2.size() && list.contains(list2.get(i2).toLowerCase());
    }
}
