package uk.ac.cam.ch.wwmm.chemicaltagger;

import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.xmlcml.euclid.EuclidConstants;
import org.xmlcml.euclid.Util;
import uk.ac.cam.ch.wwmm.oscar.tools.StringTools;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/Formatter.class */
public class Formatter {
    private static List<String> ABV_LIST = Arrays.asList("et.", "al.", "etc.", "e.g.", "i.e.", "vol.", "ca.", "wt.", "aq.", "mt.", "e.g.:");
    private static Pattern WHITESPACE_PATTERN = Pattern.compile(EuclidConstants.S_WHITEREGEX);
    private static Pattern ABBREVIATION_PATTERN = Pattern.compile("-?[A-Z]+[a-z]*\\.");
    private static Pattern CONCAT_AMOUNT_PATTERN = Pattern.compile("(\\d(\\d+|\\.\\d+|\\d*[mkµ])(g|l|hPa)[s]?|(\\d+[mnkµ]?([LMN]|[eE][qQ][\\.]?|[cCdD][mM]3|[gG][rR][aA][mM][mM]?[eE]?|[mM][oO][lL][eE]?)[sS]?))$");
    private static Pattern CONCAT_PH_PATTERN = Pattern.compile("^pH-?\\d+");
    private static Pattern CONCAT_TEMP_PATTERN = Pattern.compile("\\d+(o|°|º)[cCfF][\\.]?");
    private static Pattern CONCAT_HYPHENED_DIRECTION_PATTERN = Pattern.compile("^[A-Z]\\-\\d+");
    private static Pattern CONCAT_SLASH_DIRECTION_PATTERN = Pattern.compile("^[A-Z]\\/\\d*$");
    private static Pattern CONCAT_EQUATION_PATTERN = Pattern.compile("([a-z]*)([=\\u00d7])(\\d+)");
    private static Pattern TIME_EXPRESSION = Pattern.compile("^([01]?[1-9]|2[123]):[0-5]\\d([ap]m)?$", 2);
    private static Pattern TEMPERATURE_UNITS = Pattern.compile("[cCfF]([.,;:()\\[\\]{}]|$)");
    private static Pattern MATCH_SULPH = Pattern.compile("sulph", 2);

    private Formatter() {
    }

    public static String normaliseText(String str) {
        StringBuilder sb = new StringBuilder();
        String[] split = WHITESPACE_PATTERN.split(str.replace(EuclidConstants.S_PERCENT, " %").replace(EuclidConstants.S_SEMICOLON, " ;").replace("‐", "-").replace("‑", "-").replace("‒", "-").replace(StringTools.enDash, "-").replace(StringTools.emDash, "-").replace("―", "-").replace("-", "-").replace("−", "-").replace(EuclidConstants.S_LANGLE, " < ").replace(EuclidConstants.S_RANGLE, " > ").replace("/", " / "));
        int i = 0;
        for (String str2 : split) {
            String str3 = EuclidConstants.S_SPACE;
            String str4 = EuclidConstants.S_SPACE;
            if ((str2.endsWith("°") || str2.endsWith("º)")) && i + 1 < split.length && TEMPERATURE_UNITS.matcher(split[i + 1]).lookingAt()) {
                char charAt = str2.charAt(str2.length() - 1);
                str2 = str2.substring(0, str2.length() - 1);
                split[i + 1] = charAt + split[i + 1];
            }
            Matcher matcher = ABBREVIATION_PATTERN.matcher(str2);
            if (str2.endsWith(".") && !matcher.find() && !ABV_LIST.contains(str2.toLowerCase())) {
                str2 = str2.substring(0, str2.length() - 1);
                str4 = " ." + str4;
            }
            Matcher matcher2 = CONCAT_EQUATION_PATTERN.matcher(str2);
            while (matcher2.find()) {
                str2 = str2.replace(matcher2.group(2), EuclidConstants.S_SPACE + matcher2.group(2) + EuclidConstants.S_SPACE);
            }
            if (str2.endsWith(".") && (str2.contains("°") || str2.contains("º"))) {
                str2 = str2.substring(0, str2.length() - 1);
                str4 = " ." + str4;
            }
            if (str2.equals("K.") && i > 0 && StringUtils.isNumeric(split[i - 1].replace(".", ""))) {
                str2 = "K .";
            }
            if (str2.endsWith(EuclidConstants.S_COMMA)) {
                str2 = str2.substring(0, str2.length() - 1);
                str4 = " ," + str4;
            }
            if (str2.startsWith("(")) {
                if (Util.indexOfBalancedBracket('(', str2) < 0) {
                    str2 = str2.substring(1, str2.length());
                    str3 = str3 + "( ";
                }
            } else if (str2.trim().endsWith(")")) {
                String substring = str2.substring(0, str2.length() - 1);
                if (substring.indexOf(40) < 0) {
                    str2 = substring;
                    str4 = " )" + str4;
                }
            }
            if (str2.startsWith("(") && str2.endsWith(")")) {
                str2 = str2.substring(1, str2.length() - 1);
                str3 = str3 + "( ";
                str4 = " )" + str4;
            }
            if (CONCAT_AMOUNT_PATTERN.matcher(str2).find()) {
                str2 = splitAmounts(str2);
            }
            if (CONCAT_PH_PATTERN.matcher(str2).find()) {
                str2 = str2.substring(0, 2) + EuclidConstants.S_SPACE + str2.substring(2);
            }
            if (CONCAT_TEMP_PATTERN.matcher(str2).find()) {
                str2 = splitTemperature(str2);
            }
            if (CONCAT_HYPHENED_DIRECTION_PATTERN.matcher(str2).find()) {
                str2 = str2.replace("-", " - ");
            }
            if (!TIME_EXPRESSION.matcher(str2).find()) {
                str2 = str2.replace(EuclidConstants.S_COLON, " : ");
            }
            if (CONCAT_SLASH_DIRECTION_PATTERN.matcher(str2).find()) {
                str2 = str2.replace("/", " / ");
            }
            i++;
            sb.append(str3 + MATCH_SULPH.matcher(str2).replaceAll("sulf") + str4);
        }
        return WHITESPACE_PATTERN.matcher(sb.toString()).replaceAll(EuclidConstants.S_SPACE).trim();
    }

    private static String splitAmounts(String str) {
        int length = str.length();
        int i = 0;
        while (true) {
            if (i >= str.length()) {
                break;
            }
            if (Character.isLetter(str.charAt(i))) {
                length = i;
                break;
            }
            i++;
        }
        return str.substring(0, length) + EuclidConstants.S_SPACE + str.substring(length);
    }

    private static String splitTemperature(String str) {
        String str2 = "";
        for (char c : str.toCharArray()) {
            if (Character.isDigit(c)) {
                str2 = str2 + c;
            }
        }
        return str.replace(str2, str2 + EuclidConstants.S_SPACE);
    }
}
