package edu.northwestern.at.morphadorner.tools.findteitextlanguage;

import edu.northwestern.at.morphadorner.corpuslinguistics.languagerecognizer.DefaultLanguageRecognizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.languagerecognizer.LanguageRecognizer;
import edu.northwestern.at.utils.Compare;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ScoredString;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.xml.TEITextExtractorHandler;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Set;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

/* loaded from: input_file:edu/northwestern/at/morphadorner/tools/findteitextlanguage/FindTEITextLanguage.class */
public class FindTEITextLanguage {
    protected static final int INITPARAMS = 1;
    protected static SAXParserFactory parserFactory;
    protected static LanguageRecognizer recognizer = null;
    protected static Set<DocData> outputSet = SetFactory.createNewSortedSet();
    protected static int filesToProcess = 0;
    protected static int currentFileNumber = 0;
    protected static int longestTitle = 0;

    /* loaded from: input_file:edu/northwestern/at/morphadorner/tools/findteitextlanguage/FindTEITextLanguage$DocData.class */
    public static class DocData implements Comparable {
        public String docFileName;
        public String docTitle;
        public int docLength;
        public ScoredString[] docLanguages;

        public DocData(String str, String str2, int i, ScoredString[] scoredStringArr) {
            this.docFileName = str;
            this.docTitle = str2;
            this.docLength = i;
            ScoredString[] scoredStringArr2 = new ScoredString[3];
            if (scoredStringArr != null) {
                for (int i2 = 0; i2 < 3; i2++) {
                    if (i2 < scoredStringArr.length) {
                        scoredStringArr2[i2] = scoredStringArr[i2];
                    } else {
                        scoredStringArr2[i2] = new ScoredString("", 0.0d);
                    }
                }
            } else {
                for (int i3 = 0; i3 < 3; i3++) {
                    scoredStringArr2[i3] = new ScoredString("", 0.0d);
                }
            }
            this.docLanguages = scoredStringArr2;
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            int i = Integer.MIN_VALUE;
            if (obj != null && (obj instanceof DocData)) {
                DocData docData = (DocData) obj;
                for (int i2 = 0; i2 < this.docLanguages.length; i2++) {
                    i = this.docLanguages[i2].compareTo(docData.docLanguages[i2]);
                    if (i != 0) {
                        break;
                    }
                }
                if (i == 0) {
                    i = Compare.compare(this.docFileName, docData.docFileName);
                }
            }
            return -i;
        }
    }

    public static void main(String[] strArr) {
        if (!initialize(strArr)) {
            System.exit(1);
        }
        long currentTimeMillis = System.currentTimeMillis();
        int processFiles = processFiles(strArr);
        try {
            printResults();
        } catch (Exception e) {
            System.err.println("Unable to print results.");
        }
        try {
            outputResults(strArr[0]);
        } catch (Exception e2) {
            System.err.println("Unable to output results to " + strArr[0]);
        }
        terminate(processFiles, ((System.currentTimeMillis() - currentTimeMillis) + 999) / 1000);
    }

    protected static boolean initialize(String[] strArr) {
        if (strArr.length < 2) {
            System.err.println("Not enough parameters.");
            return false;
        }
        parserFactory = SAXParserFactory.newInstance();
        try {
            recognizer = new DefaultLanguageRecognizer();
            return true;
        } catch (Exception e) {
            System.err.println("Unable to create language recognizer.");
            return false;
        }
    }

    protected static void processOneFile(String str) {
        currentFileNumber++;
        System.err.println("Processing " + str + " (" + currentFileNumber + "/" + filesToProcess + ")");
        try {
            SAXParser newSAXParser = parserFactory.newSAXParser();
            TEITextExtractorHandler tEITextExtractorHandler = new TEITextExtractorHandler();
            newSAXParser.parse(str, tEITextExtractorHandler);
            String replaceAll = tEITextExtractorHandler.getExtractedText().replaceAll("(\\s+)", " ");
            ScoredString[] recognizeLanguage = recognizer.recognizeLanguage(replaceAll);
            String stripPathName = FileNameUtils.stripPathName(str);
            outputSet.add(new DocData(str, stripPathName, replaceAll.length(), recognizeLanguage));
            longestTitle = Math.max(longestTitle, stripPathName.length());
        } catch (Exception e) {
            e.printStackTrace();
            System.err.println("   *** " + str + " failed");
        }
    }

    protected static int processFiles(String[] strArr) {
        String[] strArr2 = new String[strArr.length - 1];
        for (int i = 1; i < strArr.length; i++) {
            strArr2[i - 1] = strArr[i];
        }
        String[] expandFileNameWildcards = FileNameUtils.expandFileNameWildcards(strArr2);
        filesToProcess = expandFileNameWildcards.length;
        for (String str : expandFileNameWildcards) {
            processOneFile(str);
        }
        return expandFileNameWildcards.length;
    }

    protected static void printResults() throws Exception {
        PrintStream printStream = new PrintStream((OutputStream) new BufferedOutputStream(System.out), true, "utf-8");
        for (DocData docData : outputSet) {
            printStream.print(docData.docTitle);
            printStream.print(StringUtils.dupl(" ", (longestTitle - docData.docTitle.length()) + 4));
            printStream.print(StringUtils.lpad(Formatters.formatIntegerWithCommas(docData.docLength), 9));
            printStream.print(" ");
            for (int i = 0; i < docData.docLanguages.length; i++) {
                ScoredString scoredString = docData.docLanguages[i];
                String string = scoredString.getString();
                printStream.print(string);
                printStream.print(StringUtils.dupl(" ", 8 - string.length()));
                String formatDouble = Formatters.formatDouble(scoredString.getScore(), 4);
                printStream.print(formatDouble);
                printStream.print(StringUtils.dupl(" ", 8 - formatDouble.length()));
            }
            printStream.println();
        }
    }

    protected static void outputResults(String str) throws Exception {
        PrintStream printStream = new PrintStream((OutputStream) new BufferedOutputStream(new FileOutputStream(str)), true, "utf-8");
        for (DocData docData : outputSet) {
            printStream.print(docData.docTitle);
            printStream.print("\t");
            printStream.print(docData.docLength);
            for (int i = 0; i < docData.docLanguages.length; i++) {
                ScoredString scoredString = docData.docLanguages[i];
                String string = scoredString.getString();
                printStream.print("\t");
                printStream.print(string);
                String formatDouble = Formatters.formatDouble(scoredString.getScore(), 4);
                printStream.print("\t");
                printStream.print(formatDouble);
            }
            printStream.println();
        }
        printStream.close();
    }

    protected static void terminate(int i, long j) {
        System.err.println("Processed " + Formatters.formatIntegerWithCommas(i) + " files in " + Formatters.formatLongWithCommas(j) + " seconds.");
    }
}
