package edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.MapUtils;
import edu.northwestern.at.utils.PatternReplacer;
import edu.northwestern.at.utils.RomanNumeralUtils;
import edu.northwestern.at.utils.StringUtils;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/northwestern/at/morphadorner/corpuslinguistics/spellingstandardizer/EnglishDecruftifier.class */
public class EnglishDecruftifier {
    protected static List<PatternReplacer> cmList1 = ListFactory.createNewList();
    protected static List<PatternReplacer> cmList2 = ListFactory.createNewList();
    protected static Pattern vConsonantPattern = Pattern.compile("v([^aeiouy])");
    protected static String vConsonantPatternReplacement = "u$1";
    protected static Pattern vuvPattern = Pattern.compile("([aeiouy])u([aeiouy])");
    protected static String vuvPatternReplacement = "$1v$2";
    protected static Pattern initUVowelPattern = Pattern.compile("^u([aeiouy])");
    protected static String initUVowelPatternReplacement = "v$1";
    protected static Pattern initPrefixvVowelPattern = Pattern.compile("^([ab|ad|con|in|per|re|sub])u([aeiouy])");
    protected static String initPrefixvVowelPatternReplacement = "$1v$2";
    protected static Pattern initIVowelPattern = Pattern.compile("^I([aeiouy])");
    protected static String initIVowelPatternReplacement = "j$1";
    protected static Pattern prefixUVowelPattern = Pattern.compile("^([ab|ad|con|in|per|re|sub])u([aeiouy])");
    protected static String prefixUVowelPatternReplacement = "$1v$2";
    protected static Pattern syllableIsyllablePattern = Pattern.compile("^([ab|ad|con|in|per|re|sub])I([ect|ud|ur|uu|uv|oin|oyn])");
    protected static String syllableIsyllablePatternReplacement = "$1j$2";
    protected static Pattern underlineCapCap = Pattern.compile("^_([ABCDEFGHIJKLMNOPQRSTUVWXYZ])([ABCDEFGHIJKLMNOPQRSTUVWXYZ])");
    protected static String underlineCapCapPatternReplacement = "$1$2";
    protected static Map<String, String> macronMap;
    protected static final String macronMapResourceName = "resources/macronmap.tab";

    /* loaded from: input_file:edu/northwestern/at/morphadorner/corpuslinguistics/spellingstandardizer/EnglishDecruftifier$CruftySpelling.class */
    public static class CruftySpelling {
        String spelling;
        boolean done;
        boolean isAllCaps;

        public CruftySpelling(String str, boolean z, boolean z2) {
            this.spelling = str;
            this.done = z;
            this.isAllCaps = z2;
        }

        public CruftySpelling(String str, boolean z) {
            this.spelling = str;
            this.done = z;
        }
    }

    protected static void addCombiningMacronPattern(List<PatternReplacer> list, String str, String str2) {
        list.add(new PatternReplacer(str, str2));
    }

    public static String replaceCombiningMacrons(String str, List<PatternReplacer> list) {
        String str2 = str;
        if (macronMap.containsKey(str)) {
            str2 = macronMap.get(str);
        } else {
            for (int i = 0; i < list.size() && str2.indexOf(CharUtils.COMBINING_MACRON_STRING) >= 0; i++) {
                str2 = list.get(i).replace(str2);
            }
        }
        return str2;
    }

    public static String replacePatterns(String str, List<PatternReplacer> list) {
        String str2 = str;
        for (int i = 0; i < list.size(); i++) {
            str2 = list.get(i).replace(str2);
        }
        return str2;
    }

    public static String fixEnd(String str, String str2, String str3) {
        String str4 = str;
        if (str4.endsWith(str2)) {
            str4 = str4.substring(0, str4.length() - str2.length()) + str3;
        }
        return str4;
    }

    public static String fixStart1(String str, String str2) {
        String str3 = str;
        if (str.length() > 0) {
            int i = 0;
            while (true) {
                if (i >= str2.length()) {
                    break;
                }
                if (str3.charAt(0) == str2.charAt(i)) {
                    str3 = str3.substring(1);
                    break;
                }
                i++;
            }
        }
        return str3;
    }

    public static String patternReplacer(Pattern pattern, String str, String str2) {
        Matcher matcher = pattern.matcher(str);
        if (str2.length() == 0) {
            while (matcher.find()) {
                StringBuffer stringBuffer = new StringBuffer();
                for (int i = 0; i < matcher.groupCount(); i++) {
                    stringBuffer.append(matcher.group(i + 1));
                }
                str = stringBuffer.toString();
                matcher = pattern.matcher(str);
            }
        } else {
            str = matcher.replaceAll(str2);
        }
        return str;
    }

    public static String patternReplacer(String str, String str2, String str3) {
        return patternReplacer(Pattern.compile(str), str2, str3);
    }

    public static CruftySpelling decruftifyStep1(String str) {
        String str2 = str;
        if (!str2.matches("'s|'S") && !CharUtils.isAllHyphens(str2) && !CharUtils.isAllAsterisks(str2) && !CharUtils.isPossessiveAsterisks(str2) && !CharUtils.isPossessiveDashes(str2)) {
            while (str2.startsWith("-")) {
                str2 = str2.substring(1);
            }
            String replaceAll = StringUtils.replaceAll(StringUtils.replaceAll(str2, "[\\+\\*\\^\\|]", ""), "ᣂ", "u");
            boolean allLettersCapital = CharUtils.allLettersCapital(replaceAll);
            if (allLettersCapital) {
                replaceAll = CharUtils.capitalizeFirstLetter(replaceAll);
            }
            if (underlineCapCap.matcher(replaceAll).find()) {
                replaceAll = (replaceAll.charAt(1) + "") + (Character.toLowerCase(replaceAll.charAt(2)) + "") + (replaceAll.length() > 3 ? replaceAll.substring(3) : "");
            }
            String replaceCombiningMacrons = replaceCombiningMacrons(StringUtils.replaceAll(replaceAll, CharUtils.LONG_S_STRING, "s"), cmList1);
            if (replaceCombiningMacrons.length() > 1) {
                replaceCombiningMacrons = fixStart1(replaceCombiningMacrons, "*/_^");
            }
            return new CruftySpelling(replaceCombiningMacrons, false, allLettersCapital);
        }
        return new CruftySpelling(str2, true, false);
    }

    public static CruftySpelling decruftifyStep2(String str) {
        return new CruftySpelling(replaceCombiningMacrons(str, cmList2), false);
    }

    public static CruftySpelling decruftifyStep3(String str) {
        String patternReplacer = patternReplacer(initPrefixvVowelPattern, patternReplacer(initUVowelPattern, patternReplacer(vuvPattern, patternReplacer(vConsonantPattern, patternReplacer("uu([aeiouy])", patternReplacer("([aeiouy])uu", StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(fixEnd(StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(fixEnd(fixEnd(fixEnd(fixEnd(fixEnd(str.replaceAll("yo[~̄]$", "ion").replaceAll("cio[~̄]$", "cion"), "cion", "tion"), "cions", "tions"), "bld", "bled"), "ynge", "ing"), "yng", "ing"), "ioin", "join"), "ioyn", "join"), "nioi", "njoy"), "nioy", "njoy"), "cyal", "cial"), "quut", "cut"), "vv", "w"), "VV", "W"), "Vv", "W"), "$1w"), "w$1"), vConsonantPatternReplacement), vuvPatternReplacement), initUVowelPatternReplacement), initPrefixvVowelPatternReplacement);
        if (!RomanNumeralUtils.isLooseRomanNumeral(patternReplacer)) {
            patternReplacer = patternReplacer(initIVowelPattern, patternReplacer, initIVowelPatternReplacement);
        }
        return new CruftySpelling(patternReplacer("^('a)", patternReplacer("^([^aeiouy])vu", fixEnd(fixEnd(patternReplacer(syllableIsyllablePattern, patternReplacer(prefixUVowelPattern, patternReplacer, prefixUVowelPatternReplacement), syllableIsyllablePatternReplacement), "mente", "ment"), "ynde", "ind"), "$1uu"), "ha"), false);
    }

    public static String decruftify(String str) {
        CruftySpelling decruftifyStep1 = decruftifyStep1(str);
        String str2 = decruftifyStep1.spelling;
        boolean z = decruftifyStep1.isAllCaps;
        if (!decruftifyStep1.done) {
            str2 = StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(decruftifyStep3(decruftifyStep2(fixEnd(fixEnd(fixEnd(fixEnd(fixEnd(fixEnd(fixEnd(fixEnd(str2, "e'd", "ed"), "y'd", "ied"), "'d", "ed"), "'t", "ed"), "'st", "est"), "'red", "ered"), "ts't", "test"), "'ry", "ery")).spelling).spelling, "~", ""), "_", ""), CharUtils.COMBINING_MACRON_STRING, "");
        }
        if (z) {
            str2 = str2.toUpperCase();
        }
        return str2;
    }

    public static String simpleDecruftify(String str) {
        CruftySpelling decruftifyStep1 = decruftifyStep1(str);
        String str2 = decruftifyStep1.spelling;
        boolean z = decruftifyStep1.isAllCaps;
        if (!decruftifyStep1.done) {
            str2 = StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(decruftifyStep2(str2).spelling.replaceAll("cio[~̄]$", "cion").replaceAll("yo[~̄]$", "yon").replaceAll("'d$", "ed").replaceAll("'t$", "ed").replaceAll("in'$", "ing"), "~", ""), "_", ""), CharUtils.COMBINING_MACRON_STRING, "");
        }
        if (z) {
            str2 = str2.toUpperCase();
        }
        return str2;
    }

    public static String[] decruftify2(String str) {
        CruftySpelling decruftifyStep1 = decruftifyStep1(str);
        String str2 = decruftifyStep1.spelling;
        String str3 = decruftifyStep1.spelling;
        boolean z = decruftifyStep1.isAllCaps;
        if (!decruftifyStep1.done) {
            str2 = StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(decruftifyStep3(decruftifyStep2(fixEnd(fixEnd(fixEnd(str2, "'d", "ed"), "'st", "est"), "'red", "ered")).spelling).spelling, "~", ""), "_", ""), CharUtils.COMBINING_MACRON_STRING, "");
            str3 = StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(decruftifyStep2(str3).spelling.replaceAll("cio[~̄]$", "cion").replaceAll("yo[~̄]$", "yon"), "~", ""), "_", ""), CharUtils.COMBINING_MACRON_STRING, "");
        }
        if (z) {
            str2 = str2.toUpperCase();
            str3 = str3.toUpperCase();
        }
        return new String[]{str2, str3};
    }

    protected EnglishDecruftifier() {
    }

    static {
        addCombiningMacronPattern(cmList1, "a[~̄]n", "amn");
        addCombiningMacronPattern(cmList1, "so[~̄]times", "sometimes");
        addCombiningMacronPattern(cmList1, "so[~̄]what", "somewhat");
        addCombiningMacronPattern(cmList1, "instrue[~̄]mt", "instruement");
        addCombiningMacronPattern(cmList1, "conte[~̄]n", "contemn");
        addCombiningMacronPattern(cmList1, "do[~̄]$", "dom");
        addCombiningMacronPattern(cmList1, "cu[~̄]$", "cum");
        addCombiningMacronPattern(cmList1, "iu[~̄]$", "ium");
        addCombiningMacronPattern(cmList1, "unu[~̄]$", "unum");
        addCombiningMacronPattern(cmList1, "who[~̄]e$", "whom");
        addCombiningMacronPattern(cmList1, "(ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz)u[~̄]$", "$1um");
        addCombiningMacronPattern(cmList1, "tyra[~̄]n", "tyrann");
        addCombiningMacronPattern(cmList1, "tira[~̄]n", "tirann");
        addCombiningMacronPattern(cmList1, "tu[~̄]$", "tum");
        addCombiningMacronPattern(cmList1, "tuu[~̄]$", "tuum");
        addCombiningMacronPattern(cmList1, "ua[~̄]", "uam");
        addCombiningMacronPattern(cmList1, "uu[~̄]$", "uum");
        addCombiningMacronPattern(cmList1, "^([tT])he[~̄]n", "$1henn");
        addCombiningMacronPattern(cmList1, "^([tT])he[~̄]c", "$1henc");
        addCombiningMacronPattern(cmList1, "^([tT])he[~̄]s", "$1hems");
        addCombiningMacronPattern(cmList1, "^(tT)he[~̄]", "$1hem");
        addCombiningMacronPattern(cmList1, "Lo[~̄]do[~̄]", "London");
        addCombiningMacronPattern(cmList1, "Macedo[~̄]", "Macedon");
        addCombiningMacronPattern(cmList1, "Abraha[~̄]", "Abraham");
        addCombiningMacronPattern(cmList1, "circu[~̄]", "circum");
        addCombiningMacronPattern(cmList1, "[~̄]([dgknst])", "n$1");
        addCombiningMacronPattern(cmList1, "[~̄]([bmp])", "m$1");
        addCombiningMacronPattern(cmList1, "so[~̄]time", "sometime");
        addCombiningMacronPattern(cmList1, "^who[~̄]$", "whom");
        addCombiningMacronPattern(cmList1, "ee[~̄]", "eme");
        addCombiningMacronPattern(cmList1, "pav[~̄]e", "pave");
        addCombiningMacronPattern(cmList1, "invo[~̄]cat", "invocat");
        addCombiningMacronPattern(cmList2, "fro[~̄]$", "from");
        addCombiningMacronPattern(cmList2, "ice[~̄]$", "icem");
        addCombiningMacronPattern(cmList2, "ma[~̄]$", "man");
        addCombiningMacronPattern(cmList2, "me[~̄]$", "men");
        addCombiningMacronPattern(cmList2, "co[~̄]e", "come");
        addCombiningMacronPattern(cmList2, "co[~̄]for", "comfor");
        addCombiningMacronPattern(cmList2, "co[~̄]l", "coml");
        addCombiningMacronPattern(cmList2, "sco[~̄]f", "scomf");
        addCombiningMacronPattern(cmList2, "co[~̄]", "con");
        addCombiningMacronPattern(cmList2, "dde[~̄]$", "dden");
        addCombiningMacronPattern(cmList2, "ke[~̄]$", "ken");
        addCombiningMacronPattern(cmList2, "wha[~̄]$", "whan");
        addCombiningMacronPattern(cmList2, "whe[~̄]$", "when");
        addCombiningMacronPattern(cmList2, "o[~̄]u", "onv");
        addCombiningMacronPattern(cmList2, "ione[~̄]$", "ionem");
        addCombiningMacronPattern(cmList2, "tate[~̄]$", "tatem");
        addCombiningMacronPattern(cmList2, "nte[~̄]", "ntem");
        addCombiningMacronPattern(cmList2, "ale[~̄]", "alem");
        addCombiningMacronPattern(cmList2, "ile[~̄]", "ilem");
        addCombiningMacronPattern(cmList2, "dine[~̄]", "dinem");
        addCombiningMacronPattern(cmList2, "gine[~̄]", "ginem");
        addCombiningMacronPattern(cmList2, "ore[~̄]", "orem");
        addCombiningMacronPattern(cmList2, "[~̄]q", "nq");
        addCombiningMacronPattern(cmList2, "[~̄]ly", "nly");
        addCombiningMacronPattern(cmList2, "ou[~̄]", "oun");
        addCombiningMacronPattern(cmList2, "io[~̄]", "ion");
        addCombiningMacronPattern(cmList2, "to[~̄]", "ton");
        addCombiningMacronPattern(cmList2, "([aeiou])[~̄]", "$1n");
        macronMap = MapFactory.createNewMap();
        try {
            macronMap = MapUtils.loadMap(EnglishDecruftifier.class.getResource(macronMapResourceName), "\t", "", "utf-8");
        } catch (Exception e) {
        }
    }
}
