package edu.northwestern.at.morphadorner.tools.tcp;

import edu.northwestern.at.morphadorner.tools.compareadornedfiles.AdornedWordsLoader;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.CountMapUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.MapUtils;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.SetUtils;
import edu.northwestern.at.utils.StringUtils;
import java.io.PrintStream;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/northwestern/at/morphadorner/tools/tcp/CountDividedWords.class */
public class CountDividedWords {
    protected static final int INITPARAMS = 2;
    protected static PrintStream printStream;
    protected static int filesToProcess = 0;
    protected static int currentFileNumber = 0;
    protected static int totalWords = 0;
    protected static String dividedWordsFileName = null;
    protected static String wordsAndCountsFileName = null;
    protected static Map<String, Number> wordsAndCounts = MapFactory.createNewMap();
    protected static Set<String> dividedWords = SetFactory.createNewSet();
    protected static Matcher partWordMatcher = Pattern.compile("\\.(\\d)$").matcher("");

    public static void main(String[] strArr) {
        int i = 0;
        long j = 0;
        try {
            if (!initialize(strArr)) {
                System.exit(1);
            }
            long currentTimeMillis = System.currentTimeMillis();
            i = processFiles(strArr);
            j = ((System.currentTimeMillis() - currentTimeMillis) + 999) / 1000;
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        terminate(i, j);
    }

    protected static boolean initialize(String[] strArr) throws Exception {
        if (strArr.length < 3) {
            System.err.println("Not enough parameters.");
            return false;
        }
        dividedWordsFileName = strArr[0];
        wordsAndCountsFileName = strArr[1];
        return true;
    }

    protected static void processOneFile(String str) {
        currentFileNumber++;
        System.out.println("Processing " + str + " (" + currentFileNumber + "/" + filesToProcess + ")");
        try {
            AdornedWordsLoader adornedWordsLoader = new AdornedWordsLoader(str);
            List<String> adornedWordIDs = adornedWordsLoader.getAdornedWordIDs();
            totalWords += adornedWordIDs.size();
            for (int i = 0; i < adornedWordIDs.size(); i++) {
                String str2 = adornedWordIDs.get(i);
                adornedWordsLoader.getAdornedWordData(str2);
                if (isFirstWordPart(str2)) {
                    String wordText = getWordText(adornedWordsLoader, str2);
                    if (!CharUtils.isPunctuationOrSymbol(wordText) && !str2.endsWith("-eos")) {
                        String replaceAll = StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceAll(wordText, CharUtils.NONBREAKING_HYPHEN_STRING, CharUtils.VERTICAL_BAR_STRING), "‑‑", CharUtils.VERTICAL_BAR_STRING), "||", CharUtils.VERTICAL_BAR_STRING);
                        if (replaceAll.length() == 0) {
                            System.out.println("   Empty word at " + str2);
                        }
                        CountMapUtils.updateWordCountMap(replaceAll, 1, wordsAndCounts);
                        if (replaceAll.indexOf(CharUtils.VERTICAL_BAR_STRING) >= 0) {
                            dividedWords.add(replaceAll);
                        }
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("   *** Processing of " + str + " failed.");
        }
    }

    protected static boolean isFirstWordPart(String str) {
        boolean z = true;
        partWordMatcher.reset(str);
        if (partWordMatcher.find()) {
            z = partWordMatcher.group(1).equals("1");
        }
        return z;
    }

    protected static String getWordText(AdornedWordsLoader adornedWordsLoader, String str) {
        List<String> wordPartIDs = getWordPartIDs(adornedWordsLoader, str);
        String str2 = "";
        for (int i = 0; i < wordPartIDs.size(); i++) {
            str2 = str2 + adornedWordsLoader.getAdornedWordData(wordPartIDs.get(i)).getWordText();
        }
        return str2;
    }

    protected static List<String> getWordPartIDs(AdornedWordsLoader adornedWordsLoader, String str) {
        List<String> createNewList = ListFactory.createNewList();
        partWordMatcher.reset(str);
        if (partWordMatcher.find()) {
            String substring = str.substring(0, str.lastIndexOf("."));
            for (int i = 1; i < 101; i++) {
                String str2 = substring + "." + i;
                if (adornedWordsLoader.getAdornedWordData(str2) == null) {
                    break;
                }
                createNewList.add(str2);
            }
        } else {
            createNewList.add(str);
        }
        return createNewList;
    }

    protected static int processFiles(String[] strArr) throws Exception {
        String[] strArr2 = new String[strArr.length - 2];
        for (int i = 2; i < strArr.length; i++) {
            strArr2[i - 2] = strArr[i];
        }
        String[] expandFileNameWildcards = FileNameUtils.expandFileNameWildcards(strArr2);
        filesToProcess = expandFileNameWildcards.length;
        for (String str : expandFileNameWildcards) {
            processOneFile(str);
        }
        SetUtils.saveSortedSet(dividedWords, dividedWordsFileName, "utf-8");
        MapUtils.saveSortedMap(wordsAndCounts, wordsAndCountsFileName, "\t", "", "utf-8");
        return expandFileNameWildcards.length;
    }

    protected static void terminate(int i, long j) {
        System.out.println("Processed " + Formatters.formatLongWithCommas(totalWords) + StringUtils.pluralize(totalWords, " word in ", " words in ") + Formatters.formatIntegerWithCommas(i) + StringUtils.pluralize(i, " file in ", " files in ") + Formatters.formatLongWithCommas(j) + StringUtils.pluralize(j, " second.", " seconds."));
    }
}
