package edu.northwestern.at.morphadorner.tools.adornedtosimpleteip5;

import edu.northwestern.at.morphadorner.gate.MorphAdornerGateWrapperBase;
import edu.northwestern.at.morphadorner.tools.AdornedXMLWriter;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.SetUtils;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.xml.JDOMFragmentParser;
import edu.northwestern.at.utils.xml.JDOMUtils;
import edu.northwestern.at.utils.xml.jdom.ElementsFilter;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import org.jdom2.Attribute;
import org.jdom2.Content;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.filter.Filters;
import org.jdom2.util.IteratorIterable;

/* loaded from: input_file:edu/northwestern/at/morphadorner/tools/adornedtosimpleteip5/AdornedToSimpleTEIP5.class */
public class AdornedToSimpleTEIP5 {
    protected static String outputDirectory;
    protected static PrintStream printStream;
    protected static final int INITPARAMS = 5;
    protected static int docsToProcess = 0;
    protected static int currentDocNumber = 0;
    protected static String interpGrpXMLText = "";
    protected static boolean haveInterpGrp = false;
    protected static boolean forceAna = true;
    protected static boolean useReg = true;
    protected static String lastID = "";
    protected static int gapCount = 0;
    protected static int sentenceCount = 0;
    protected static String badWorksFileName = "";
    protected static Set<String> badWorksSet = null;
    protected static String goodWorksFileName = "";
    protected static Set<String> goodWorksSet = null;
    protected static Namespace teiNamespace = Namespace.getNamespace("http://www.tei-c.org/ns/1.0");

    public static void main(String[] strArr) {
        try {
            if (!initialize(strArr)) {
                System.exit(1);
            }
            terminate(processFiles(strArr), ((System.currentTimeMillis() - System.currentTimeMillis()) + 999) / 1000);
        } catch (Exception e) {
            System.out.println(e.getMessage());
        }
    }

    protected static boolean initialize(String[] strArr) throws Exception {
        printStream = new PrintStream((OutputStream) new BufferedOutputStream(System.out), true, "utf-8");
        if (strArr.length < 6) {
            System.err.println("Not enough parameters.");
            return false;
        }
        outputDirectory = strArr[0];
        String lowerCase = strArr[1].toLowerCase();
        if (lowerCase.equals("usereg")) {
            useReg = true;
        } else if (lowerCase.equals("usechoice")) {
            useReg = false;
        }
        interpGrpXMLText = "";
        try {
            FileUtils.readTextFile(strArr[2], "utf-8");
        } catch (Exception e) {
        }
        interpGrpXMLText = interpGrpXMLText.trim();
        haveInterpGrp = interpGrpXMLText.length() > 0;
        goodWorksFileName = strArr[3];
        goodWorksSet = SetFactory.createNewSortedSet();
        badWorksFileName = strArr[4];
        badWorksSet = SetFactory.createNewSortedSet();
        return true;
    }

    protected static void processOneFile(String str) {
        String str2 = "";
        try {
            String changeFileExtension = FileNameUtils.changeFileExtension(FileNameUtils.stripPathName(str), "");
            str2 = new File(outputDirectory, changeFileExtension + ".xml").getAbsolutePath();
            FileUtils.createPathForFile(str2);
            Document parse = JDOMUtils.parse(str);
            Element rootElement = parse.getRootElement();
            rootElement.removeChild("monkHeader", Namespace.getNamespace("http://monk.at.northwestern.edu/ns/1.0"));
            IteratorIterable descendants = rootElement.getDescendants(Filters.element("sup"));
            List createNewList = ListFactory.createNewList();
            while (descendants.hasNext()) {
                createNewList.add((Element) descendants.next());
            }
            for (int i = 0; i < createNewList.size(); i++) {
                replaceSupWithHi((Element) createNewList.get(i));
            }
            IteratorIterable descendants2 = rootElement.getDescendants(Filters.content());
            List createNewList2 = ListFactory.createNewList();
            String str3 = "";
            while (descendants2.hasNext()) {
                Element element = (Content) descendants2.next();
                createNewList2.add(element);
                if (element instanceof Element) {
                    Element element2 = element;
                    String name = element2.getName();
                    if (str3.length() == 0 && name.equals("w")) {
                        str3 = JDOMUtils.getAttributeValue(element2, "xml:id", true);
                    }
                }
            }
            lastID = changeFileExtension + "-" + StringUtils.dupl("0", str3.length());
            SortedSet createNewSortedSet = SetFactory.createNewSortedSet();
            for (int i2 = 0; i2 < createNewList2.size(); i2++) {
                Element element3 = (Content) createNewList2.get(i2);
                if (element3 instanceof Element) {
                    Element element4 = element3;
                    String name2 = element4.getName();
                    if (name2.equals("w") || name2.equals("pc")) {
                        addWordID(element4, createNewSortedSet);
                    }
                }
            }
            int i3 = 0;
            while (i3 < createNewList2.size()) {
                Element element5 = (Content) createNewList2.get(i3);
                String name3 = element5 instanceof Element ? element5.getName() : "";
                if (name3.equals("w") || name3.equals("pc")) {
                    i3 = handleW(createNewList2, i3);
                } else if (name3.equals("gap")) {
                    handleGap(element5, false, null);
                }
                i3++;
            }
            SortedSet createNewSortedSet2 = SetFactory.createNewSortedSet();
            IteratorIterable descendants3 = rootElement.getDescendants(new ElementsFilter(new String[]{"w", "pc"}));
            while (descendants3.hasNext()) {
                addWordID((Content) descendants3.next(), createNewSortedSet2);
            }
            createNewSortedSet.removeAll(createNewSortedSet2);
            if (createNewSortedSet.size() > 0) {
                printStream.println("*** Error *** in " + str + ": " + createNewSortedSet.size() + " words not properly converted.");
                for (String str4 : (String[]) createNewSortedSet.toArray(new String[createNewSortedSet.size()])) {
                    printStream.println(str4);
                }
                badWorksSet.add(str);
            } else {
                goodWorksSet.add(str);
            }
            if (haveInterpGrp) {
                Element child = rootElement.getChild("text", Namespace.getNamespace("http://www.tei-c.org/ns/1.0"));
                if (child == null) {
                    child = rootElement.getChild("text");
                }
                for (Element element6 : new JDOMFragmentParser(new Namespace[]{Namespace.getNamespace("http://www.tei-c.org/ns/1.0")}).parseFragment(interpGrpXMLText)) {
                    if (child != null) {
                        child.addContent(element6);
                    }
                }
            }
            new AdornedXMLWriter(parse, str2);
            printStream.println("Reformatted " + str + " to " + str2);
        } catch (Exception e) {
            e.printStackTrace();
            printStream.println("Problem reformatting " + str + " to " + str2 + ": " + e.getMessage());
        }
    }

    protected static void addWordID(Element element, Set<String> set) {
        set.add(JDOMUtils.getAttributeValue(element, "xml:id", true));
    }

    protected static int handleW(List<Content> list, int i) {
        Element element = list.get(i);
        String attributeValue = element.getAttributeValue("reg");
        Element cleanWElement = cleanWElement(element);
        String trim = cleanWElement.getText().trim();
        if (!useReg && attributeValue != null && !attributeValue.equals(trim)) {
            generateChoice(cleanWElement, trim, attributeValue);
        }
        return i;
    }

    protected static Element cleanWElement(Element element) {
        Element parentElement;
        lastID = JDOMUtils.getAttributeValue(element, "xml:id", true);
        gapCount = 0;
        sentenceCount = 0;
        boolean z = element.getAttributeValue("eos") != null && element.getAttributeValue("eos").equals("1");
        String attributeValue = element.getAttributeValue("spe");
        String attributeValue2 = element.getAttributeValue("part");
        if (attributeValue2 == null) {
            attributeValue2 = "N";
        }
        Attribute attribute = element.getAttribute("lem");
        if (attribute != null) {
            attribute.setName(MorphAdornerGateWrapperBase.TOKEN_LEMMA_FEATURE_NAME);
        }
        Attribute attribute2 = element.getAttribute("pos");
        if (attribute2 == null) {
            attribute2 = element.getAttribute("ana");
            if (attribute2 != null) {
                String value = attribute2.getValue();
                if (value.charAt(0) == '#') {
                    value.substring(1);
                }
            }
        } else {
            attribute2.getValue();
        }
        if (attribute2 != null) {
            if (haveInterpGrp || forceAna) {
                attribute2.setName("ana");
                attribute2.setValue("#" + attribute2.getValue());
            } else {
                attribute2.setName("pos");
                attribute2.setValue(attribute2.getValue());
            }
        }
        element.removeAttribute("ord");
        if (attributeValue2.equals("N")) {
            element.removeAttribute("part");
        }
        element.removeAttribute("spe");
        element.removeAttribute("tok");
        if (!useReg) {
            element.removeAttribute("reg");
        }
        element.removeAttribute("eos");
        element.removeAttribute("ms");
        if (element.getName().equals("pc") || CharUtils.isPunctuation(attributeValue)) {
            element.setName("pc");
            element.removeAttribute(MorphAdornerGateWrapperBase.TOKEN_LEMMA_FEATURE_NAME);
            element.removeAttribute("ana");
            element.removeAttribute("type");
            element.removeAttribute("reg");
            if (z) {
                element.setAttribute("unit", "sentence");
                z = false;
            }
        }
        if (z && (parentElement = element.getParentElement()) != null) {
            int indexOf = parentElement.indexOf(element);
            Element createElement = createElement("pc");
            StringBuilder append = new StringBuilder().append(lastID).append("-");
            int i = sentenceCount + 1;
            sentenceCount = i;
            JDOMUtils.setAttributeValue(createElement, "xml:id", append.append(i).toString());
            createElement.setAttribute("unit", "sentence");
            parentElement.setContent(indexOf + 1, createElement);
        }
        return element;
    }

    protected static Element createElement(String str) {
        return new Element(str, teiNamespace);
    }

    protected static void handleGap(Content content, boolean z, List<Element> list) {
        Element element = (Element) content;
        String attributeValue = JDOMUtils.getAttributeValue(element, "xml:id", true);
        if (attributeValue == null || attributeValue.length() <= 0) {
            String str = lastID + "-gap" + gapCount;
            gapCount++;
            JDOMUtils.setAttributeValue(element, "xml:id", str);
        }
    }

    protected static void handleSup(Content content) {
        replaceSupWithHi((Element) content);
    }

    protected static void replaceSupWithHi(Element element) {
        element.setName("hi");
        Attribute attribute = new Attribute("rend", "sup");
        List createNewList = ListFactory.createNewList();
        createNewList.add(attribute);
        element.setAttributes(createNewList);
    }

    protected static String displayElement(Element element) {
        String attributeValue = JDOMUtils.getAttributeValue(element, "xml:id", true);
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Name: " + element.getName());
        if (attributeValue != null && attributeValue.length() > 0) {
            stringBuffer.append(", id: " + attributeValue);
        }
        return stringBuffer.toString();
    }

    protected static Element generateChoice(Element element, String str, String str2) {
        element.setText("");
        Element createElement = createElement("choice");
        element.addContent(createElement);
        Element createElement2 = createElement("orig");
        if (str != null) {
            createElement2.setText(str);
        }
        createElement.addContent(createElement2);
        Element createElement3 = createElement("reg");
        createElement3.setText(str2);
        createElement.addContent(createElement3);
        return createElement2;
    }

    protected static int processFiles(String[] strArr) {
        String[] strArr2 = new String[strArr.length - 5];
        for (int i = 5; i < strArr.length; i++) {
            strArr2[i - 5] = strArr[i];
        }
        String[] expandFileNameWildcards = FileNameUtils.expandFileNameWildcards(strArr2);
        docsToProcess = expandFileNameWildcards.length;
        for (String str : expandFileNameWildcards) {
            processOneFile(str);
        }
        try {
            SetUtils.saveSet(goodWorksSet, goodWorksFileName, "utf-8");
            SetUtils.saveSet(badWorksSet, badWorksFileName, "utf-8");
        } catch (Exception e) {
            e.printStackTrace();
        }
        return expandFileNameWildcards.length;
    }

    protected static void terminate(int i, long j) {
        printStream.println("Processed " + Formatters.formatIntegerWithCommas(i) + " files in " + Formatters.formatLongWithCommas(j) + " seconds.");
    }
}
