package edu.northwestern.at.morphadorner.tools.fixquotes;

import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.SingleTagTaggedStrings;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.TextFile;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/northwestern/at/morphadorner/tools/fixquotes/FixQuotes.class */
public class FixQuotes {
    protected static final String lsquo = "&lsquo;";
    protected static final String ldquo = "&ldquo;";
    protected static final String rsquo = "&rsquo;";
    protected static final String rdquo = "&rdquo;";
    protected static final String apos = "&apos;";
    protected static final String sq = "\ue060";
    protected static final String dq = "\ue061";

    public static void main(String[] strArr) {
        String str = "";
        try {
            str = FileUtils.readTextFile(strArr[0], "utf-8");
        } catch (Exception e) {
            e.printStackTrace();
        }
        TaggedStrings loadContractions = loadContractions("resources/contractions.txt");
        try {
            FileUtils.writeTextFile(strArr[1], false, repairQuotes(str, buildContractionsPattern(loadContractions).matcher(""), loadContractions), "utf-8");
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    public static String repairQuotes(String str, Matcher matcher, TaggedStrings taggedStrings) {
        String replaceAll = str.replaceAll("``", ldquo).replaceAll("`", lsquo).replaceAll("(\\s)\"(?=\\s)", "$1\ue061").replaceAll("(\\s)'(?=\\s)", "$1\ue060");
        if (matcher != null) {
            matcher.reset(replaceAll);
            StringBuffer stringBuffer = new StringBuffer();
            while (matcher.find()) {
                String group = matcher.group(1);
                String group2 = matcher.group(2);
                if (taggedStrings.containsString(group2)) {
                    group2 = group2.replaceAll("'", apos);
                }
                matcher.appendReplacement(stringBuffer, group + group2);
            }
            matcher.appendTail(stringBuffer);
            replaceAll = stringBuffer.toString();
        }
        String replaceAll2 = StringUtils.replaceAll(StringUtils.replaceAll(replaceAll.replaceAll("^'(?=\\p{Punct}\\B)", lsquo).replaceAll("^\"(?=\\p{Punct}\\B)", ldquo).replaceAll("\"'(?=\\w)", "&ldquo;&lsquo;").replaceAll("\"'(?=\\W)", "&rdquo;&rsquo;").replaceAll("'\"(?=\\w)", "&lsquo;&ldquo;").replaceAll("'\"(?=\\W)", "&rsquo;&rdquo;").replaceAll("'(?=\\d{2}s)", apos).replaceAll("(\\s|-{1,}|–{1,}|—{1,})'(?=\\w)", "$1&lsquo;").replaceAll("(\\w)?'(\\w)", "$1&apos;$2").replaceAll("([^\\ \\t\\r\\n\\[\\{\\(\\-–—])?'", "$1&rsquo;").replaceAll("'(?=\\s|s\\b|S\\b)", rsquo), "'", lsquo).replaceAll("(\\s|-{1,}|–{1,}|—{1,})\"(?=\\w)", "$1&ldquo;").replaceAll("([^\\ \\t\\r\\n\\[\\{\\(\\-–—])?\"", "$1&rdquo;").replaceAll("\"(?=\\s)", rdquo), "\"", ldquo);
        int indexOf = replaceAll2.indexOf(sq);
        boolean z = false;
        while (indexOf >= 0) {
            replaceAll2 = replaceAll2.substring(0, indexOf) + (z ? rsquo : lsquo) + replaceAll2.substring(indexOf + 1);
            z = !z;
            indexOf = replaceAll2.indexOf(sq);
        }
        int indexOf2 = replaceAll2.indexOf(dq);
        boolean z2 = false;
        while (indexOf2 >= 0) {
            replaceAll2 = replaceAll2.substring(0, indexOf2) + (z2 ? rdquo : ldquo) + replaceAll2.substring(indexOf2 + 1);
            z2 = !z2;
            indexOf2 = replaceAll2.indexOf(dq);
        }
        return replaceAll2;
    }

    public static TaggedStrings loadContractions(String str) {
        return new SingleTagTaggedStrings(new TextFile(DefaultWordTokenizer.class.getResourceAsStream(str), "utf-8").toArray(), "1");
    }

    public static Pattern buildContractionsPattern(TaggedStrings taggedStrings) {
        return Pattern.compile("(\\W)('\\w*|\\w*')(?=\\W)");
    }

    protected FixQuotes() {
    }
}
