package edu.northwestern.at.morphadorner;

import edu.northwestern.at.morphadorner.corpuslinguistics.abbreviations.Abbreviations;
import edu.northwestern.at.morphadorner.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.morphadorner.corpuslinguistics.inputter.TextInputter;
import edu.northwestern.at.morphadorner.corpuslinguistics.inputter.TextInputterFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.LemmatizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.morphadorner.corpuslinguistics.namestandardizer.NameStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.outputter.AdornedWordOutputter;
import edu.northwestern.at.morphadorner.corpuslinguistics.outputter.AdornedWordOutputterFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTagsFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechRetagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechRetaggerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechTagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechTaggerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.guesser.PartOfSpeechGuesserFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.contextual.ContextualSmoother;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.contextual.ContextualSmootherFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.lexical.LexicalSmoother;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.lexical.LexicalSmootherFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.transitionmatrix.TransitionMatrix;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitterFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingmapper.SpellingMapper;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PennTreebankTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PostTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PostTokenizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PreTokenizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizerFactory;
import edu.northwestern.at.morphadorner.tools.ExtendedAdornedWord;
import edu.northwestern.at.morphadorner.tools.ExtendedAdornedWordFilter;
import edu.northwestern.at.morphadorner.tools.FilterAdornedFile;
import edu.northwestern.at.morphadorner.xgtagger.XGMisc;
import edu.northwestern.at.morphadorner.xgtagger.XGParser;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.SortedArrayList;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.TaggedStringsSet;
import edu.northwestern.at.utils.URLUtils;
import edu.northwestern.at.utils.logger.UsesLogger;
import edu.northwestern.at.utils.xml.DOMUtils;
import edu.northwestern.at.utils.xml.TEITagClassifier;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.helpers.XMLReaderFactory;

/* loaded from: input_file:edu/northwestern/at/morphadorner/MorphAdorner.class */
public class MorphAdorner {
    protected static Map<String, MorphAdorner> storedAdorners = MapFactory.createNewSynchronizedMap();
    public int defaultKWICWidth;
    public String latinWordsFileName;
    public String extraWordsFileName;
    public TaggedStrings extraWords;
    public WordTokenizer spellingTokenizer;
    public PartOfSpeechTags partOfSpeechTags;
    public PartOfSpeechTagger tagger;
    public PartOfSpeechRetagger retagger;
    public Lexicon wordLexicon;
    public PartOfSpeechGuesser partOfSpeechGuesser;
    public Lexicon suffixLexicon;
    public TransitionMatrix transitionMatrix;
    public SpellingStandardizer spellingStandardizer;
    public SpellingMapper spellingMapper;
    public NameStandardizer nameStandardizer;
    public Lemmatizer lemmatizer;
    public Names names;
    public Abbreviations abbreviations;
    public Abbreviations mainAbbreviations;
    public Abbreviations sideAbbreviations;
    public String tagSeparator;
    public String lemmaSeparator;
    public MorphAdornerLogger morphAdornerLogger;
    public MorphAdornerSettings morphAdornerSettings;
    public MorphAdornerSettings tokenizationSettings;
    public TEITagClassifier tagClassifier;

    public MorphAdorner() {
        this.defaultKWICWidth = 80;
        this.latinWordsFileName = "resources/latinwords.txt";
        this.extraWordsFileName = "resources/extrawords.txt";
        this.extraWords = null;
        this.spellingTokenizer = new PennTreebankTokenizer();
        this.names = new Names();
        this.abbreviations = new Abbreviations();
        this.mainAbbreviations = new Abbreviations();
        this.sideAbbreviations = new Abbreviations();
        this.tagSeparator = CharUtils.VERTICAL_BAR_STRING;
        this.lemmaSeparator = CharUtils.VERTICAL_BAR_STRING;
        this.morphAdornerLogger = null;
        this.morphAdornerSettings = null;
        this.tokenizationSettings = null;
        this.tagClassifier = new TEITagClassifier();
    }

    public MorphAdorner(String[] strArr, String str, String str2) {
        this.defaultKWICWidth = 80;
        this.latinWordsFileName = "resources/latinwords.txt";
        this.extraWordsFileName = "resources/extrawords.txt";
        this.extraWords = null;
        this.spellingTokenizer = new PennTreebankTokenizer();
        this.names = new Names();
        this.abbreviations = new Abbreviations();
        this.mainAbbreviations = new Abbreviations();
        this.sideAbbreviations = new Abbreviations();
        this.tagSeparator = CharUtils.VERTICAL_BAR_STRING;
        this.lemmaSeparator = CharUtils.VERTICAL_BAR_STRING;
        this.morphAdornerLogger = null;
        this.morphAdornerSettings = null;
        this.tokenizationSettings = null;
        this.tagClassifier = new TEITagClassifier();
        this.morphAdornerSettings = new MorphAdornerSettings();
        try {
            this.morphAdornerLogger = new MorphAdornerLogger(str, str2, this.morphAdornerSettings);
        } catch (Exception e) {
            e.printStackTrace();
        }
        this.morphAdornerSettings.initializeSettings(this.morphAdornerLogger);
        try {
            this.morphAdornerSettings.getSettings(strArr);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
        this.morphAdornerLogger.println("programBanner");
        this.morphAdornerLogger.println("Initializing_please_wait");
        initializeAdornment();
        this.morphAdornerSettings.initialized = true;
    }

    public MorphAdorner(String[] strArr) {
        this(strArr, "morphadornerlog.config", "log");
    }

    public static Map<String, MorphAdorner> getStoredAdorners() {
        return storedAdorners;
    }

    public static void setStoredAdorners(Map<String, MorphAdorner> map) {
        storedAdorners = map;
    }

    protected void initializeAdornment() {
        try {
            this.partOfSpeechTags = PartOfSpeechTagsFactory.newPartOfSpeechTags(this.morphAdornerSettings.properties);
            this.tagSeparator = this.partOfSpeechTags.getTagSeparator();
            PostTokenizer newPostTokenizer = PostTokenizerFactory.newPostTokenizer(this.morphAdornerSettings.properties);
            this.tagger = PartOfSpeechTaggerFactory.newPartOfSpeechTagger(this.morphAdornerSettings.properties);
            this.retagger = PartOfSpeechRetaggerFactory.newPartOfSpeechRetagger(this.morphAdornerSettings.properties);
            this.tagger.setPostTokenizer(newPostTokenizer);
            this.retagger.setPostTokenizer(newPostTokenizer);
            ((UsesLogger) this.tagger).setLogger(this.morphAdornerLogger.getLogger());
            ((UsesLogger) this.retagger).setLogger(this.morphAdornerLogger.getLogger());
            ContextualSmoother newContextualSmoother = ContextualSmootherFactory.newContextualSmoother(this.morphAdornerSettings.properties);
            newContextualSmoother.setPartOfSpeechTagger(this.tagger);
            LexicalSmoother newLexicalSmoother = LexicalSmootherFactory.newLexicalSmoother(this.morphAdornerSettings.properties);
            newLexicalSmoother.setPartOfSpeechTagger(this.tagger);
            this.tagger.setContextualSmoother(newContextualSmoother);
            this.tagger.setLexicalSmoother(newLexicalSmoother);
            ContextualSmoother newContextualSmoother2 = ContextualSmootherFactory.newContextualSmoother(this.morphAdornerSettings.properties);
            newContextualSmoother2.setPartOfSpeechTagger(this.retagger);
            LexicalSmoother newLexicalSmoother2 = LexicalSmootherFactory.newLexicalSmoother(this.morphAdornerSettings.properties);
            newLexicalSmoother2.setPartOfSpeechTagger(this.retagger);
            this.retagger.setContextualSmoother(newContextualSmoother2);
            this.retagger.setLexicalSmoother(newLexicalSmoother2);
            this.tagger.setRetagger(this.retagger);
            this.morphAdornerLogger.println("Using", new Object[]{this.tagger.toString()});
            this.morphAdornerLogger.println("Using", new Object[]{this.retagger.toString()});
            this.wordLexicon = MorphAdornerUtils.loadWordLexicon(this.morphAdornerSettings, this.morphAdornerLogger);
            this.wordLexicon.setPartOfSpeechTags(this.partOfSpeechTags);
            this.partOfSpeechGuesser = PartOfSpeechGuesserFactory.newPartOfSpeechGuesser(this.morphAdornerSettings.properties);
            this.partOfSpeechGuesser.setCheckPossessives(this.morphAdornerSettings.getBooleanProperty("partofspeechguesser.check_possessives", false));
            this.tagger.setPartOfSpeechGuesser(this.partOfSpeechGuesser);
            this.partOfSpeechGuesser.setWordLexicon(this.wordLexicon);
            ((UsesLogger) this.partOfSpeechGuesser).setLogger(this.morphAdornerLogger.getLogger());
            this.suffixLexicon = MorphAdornerUtils.loadSuffixLexicon(this.morphAdornerSettings, this.morphAdornerLogger);
            this.partOfSpeechGuesser.setSuffixLexicon(this.suffixLexicon);
            this.extraWords = MorphAdornerUtils.getExtraWordsList(this.extraWordsFileName, this.partOfSpeechTags.getSingularProperNounTag(), "Loaded_extra_words", this.morphAdornerSettings, this.morphAdornerLogger);
            this.partOfSpeechGuesser.addAuxiliaryWordList(this.extraWords);
            this.partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(this.names.getPlaceNames().keySet(), this.partOfSpeechTags.getSingularProperNounTag()));
            this.partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(this.names.getFirstNames(), this.partOfSpeechTags.getSingularProperNounTag()));
            this.partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(this.names.getSurnames(), this.partOfSpeechTags.getSingularProperNounTag()));
            if (this.morphAdornerSettings.useLatinWordList) {
                this.partOfSpeechGuesser.addAuxiliaryWordList(MorphAdornerUtils.getWordList(this.latinWordsFileName, this.partOfSpeechTags.getForeignWordTag("latin"), "Loaded_latin_words", this.morphAdornerSettings, this.morphAdornerLogger));
            }
            if (this.morphAdornerSettings.abbreviationsURL.length() > 0) {
                addAbbreviations(this.abbreviations, URLUtils.getURLFromFileNameOrURL(this.morphAdornerSettings.abbreviationsURL).toString(), "Loaded_abbreviations");
            }
            if (this.morphAdornerSettings.abbreviationsMainTextURL.length() > 0) {
                addAbbreviations(this.mainAbbreviations, URLUtils.getURLFromFileNameOrURL(this.morphAdornerSettings.abbreviationsMainTextURL).toString(), "Loaded_abbreviations");
            }
            if (this.morphAdornerSettings.abbreviationsSideTextURL.length() > 0) {
                addAbbreviations(this.sideAbbreviations, URLUtils.getURLFromFileNameOrURL(this.morphAdornerSettings.abbreviationsSideTextURL).toString(), "Loaded_abbreviations");
            }
            this.tagger.setLexicon(this.wordLexicon);
            MorphAdornerUtils.loadTaggerRules(this.tagger, this.morphAdornerSettings, this.morphAdornerLogger);
            this.transitionMatrix = MorphAdornerUtils.loadTransitionMatrix(this.tagger, this.morphAdornerSettings, this.morphAdornerLogger);
            this.spellingStandardizer = MorphAdornerUtils.createSpellingStandardizer(this.wordLexicon, this.names, this.morphAdornerSettings, this.morphAdornerLogger);
            this.spellingMapper = MorphAdornerUtils.createSpellingMapper(this.morphAdornerSettings.properties);
            this.nameStandardizer = MorphAdornerUtils.createNameStandardizer(this.wordLexicon, this.morphAdornerSettings, this.morphAdornerLogger);
            if (this.spellingStandardizer != null) {
                this.partOfSpeechGuesser.setSpellingStandardizer(this.spellingStandardizer);
            }
            this.lemmatizer = LemmatizerFactory.newLemmatizer(this.morphAdornerSettings.properties);
            this.lemmaSeparator = this.lemmatizer.getLemmaSeparator();
            this.lemmatizer.setLexicon(this.wordLexicon);
            this.lemmatizer.setDictionary(this.spellingStandardizer.getStandardSpellings());
            ((UsesLogger) this.lemmatizer).setLogger(this.morphAdornerLogger.getLogger());
            this.partOfSpeechGuesser.setAbbreviations(this.abbreviations);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void processInputFiles(boolean z) {
        long currentTimeMillis = System.currentTimeMillis();
        switch (this.morphAdornerSettings.fileNames.length) {
            case 0:
                this.morphAdornerLogger.println("No_files_to_process");
                break;
            case 1:
                this.morphAdornerLogger.println("One_file_to_process");
                break;
            default:
                this.morphAdornerLogger.println("Number_of_files_to_process", new Object[]{Formatters.formatIntegerWithCommas(this.morphAdornerSettings.fileNames.length)});
                break;
        }
        boolean booleanProperty = this.morphAdornerSettings.getBooleanProperty("adorner.handle_xml", false);
        MorphAdornerUtils.logMemoryUsage(this.morphAdornerLogger, "Before processing input texts: ");
        for (int i = 0; i < this.morphAdornerSettings.fileNames.length; i++) {
            String str = this.morphAdornerSettings.fileNames[i];
            this.morphAdornerLogger.println("Processing_file", new Object[]{str});
            if (booleanProperty) {
                try {
                    if (MorphAdornerUtils.isAdorned(str, 500)) {
                        readorn(str);
                    } else {
                        adornXML(str, z);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            } else {
                adornFile(str);
            }
        }
        if (this.morphAdornerSettings.fileNames.length > 0) {
            this.morphAdornerLogger.println("All_files_adorned", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis)});
        }
    }

    public void processInputFiles() {
        processInputFiles(false);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void adornXML(String str, boolean z) throws Exception {
        if (!this.morphAdornerSettings.adornExistingXMLFiles && doesOutputFileNameExist(str)) {
            this.morphAdornerLogger.println("Skipping_file_which_is_already_adorned", new Object[]{str});
            return;
        }
        TextInputter newTextInputter = TextInputterFactory.newTextInputter(this.morphAdornerSettings.properties);
        newTextInputter.enableGapFixer(this.morphAdornerSettings.fixGapTags);
        newTextInputter.enableOrigFixer(this.morphAdornerSettings.fixOrigTags);
        newTextInputter.enableSplitWordsFixer(this.morphAdornerSettings.fixSplitWords, this.morphAdornerSettings.fixSplitWordsPatternReplacers);
        newTextInputter.loadText(URLUtils.getURLFromFileNameOrURL(str), "utf-8", this.morphAdornerSettings.xmlSchema);
        int segmentCount = newTextInputter.getSegmentCount();
        String formatIntegerWithCommas = Formatters.formatIntegerWithCommas(segmentCount);
        this.morphAdornerLogger.println("Input_file_split", new Object[]{str, formatIntegerWithCommas});
        int i = 0;
        Map<Integer, Integer> createNewMap = MapFactory.createNewMap();
        int i2 = 0;
        int i3 = 0;
        for (int i4 = 0; i4 < segmentCount; i4++) {
            String segmentName = newTextInputter.getSegmentName(i4);
            if (segmentName.startsWith("text") && !segmentName.equals("text")) {
                this.morphAdornerLogger.println("Processing_segment", new Object[]{segmentName, Formatters.formatIntegerWithCommas(i4 + 1), formatIntegerWithCommas});
                Document textToDOM = XGParser.textToDOM(this.morphAdornerSettings.xgOptions, newTextInputter.getSegmentText(segmentName));
                MorphAdornerUtils.fixEmptySoftTags(this.morphAdornerSettings.xgOptions, textToDOM);
                MorphAdornerUtils.fixSupTags(textToDOM);
                i3 += MorphAdornerUtils.countPageBreaks(textToDOM);
                Object[] extractText = XGParser.extractText(this.morphAdornerSettings.xgOptions, textToDOM);
                XGParser xGParser = (XGParser) extractText[1];
                xGParser.setRunningWordID(i);
                AdornedWordOutputter adornText = adornText((String) extractText[0], null);
                this.morphAdornerLogger.println("Inserting_adornments_into_xml");
                long currentTimeMillis = System.currentTimeMillis();
                Map<Integer, Integer> mergeAdornments = XGParser.mergeAdornments(this.morphAdornerSettings.xgOptions, (XGParser) extractText[1], textToDOM, segmentName, adornText, newTextInputter);
                fixSideWords(textToDOM, this.sideAbbreviations);
                File createTempFile = File.createTempFile("mad", null);
                if (XGMisc.printNodeToFile(textToDOM, createTempFile.getAbsolutePath()) == 1) {
                    newTextInputter.setSegmentText(segmentName, createTempFile);
                    if (!newTextInputter.usesSegmentFiles()) {
                        createTempFile.delete();
                    }
                }
                Iterator<Integer> it = mergeAdornments.keySet().iterator();
                while (it.hasNext()) {
                    int intValue = it.next().intValue();
                    if (mergeAdornments.get(Integer.valueOf(intValue)).intValue() > 1) {
                        createNewMap.put(Integer.valueOf(intValue), mergeAdornments.get(Integer.valueOf(intValue)));
                    }
                }
                this.morphAdornerLogger.println("Inserted_adornments_into_xml", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis)});
                i = xGParser.getRunningWordID();
                i2 += xGParser.getNumberOfAdornedWords();
                if (!newTextInputter.usesSegmentFiles()) {
                    FileUtils.deleteFile(adornText.getOutputFileName());
                }
                extractText[0] = null;
                extractText[1] = null;
            }
        }
        String outputFileName = getOutputFileName(str);
        long currentTimeMillis2 = System.currentTimeMillis();
        this.morphAdornerLogger.println("Merging_adorned");
        String absolutePath = File.createTempFile("mad", null).getAbsolutePath();
        mergeXML(newTextInputter, absolutePath);
        this.morphAdornerLogger.println("Writing_merged", new Object[]{outputFileName});
        MorphAdornerXMLWriterFactory.newMorphAdornerXMLWriter(this.morphAdornerSettings.properties).writeXML(absolutePath, outputFileName, i, this.partOfSpeechTags, createNewMap, i2, i3, this, z);
        FileUtils.deleteFile(absolutePath);
        this.morphAdornerLogger.println("Adorned_XML_written", new Object[]{outputFileName, MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis2)});
        ((IsCloseableObject) newTextInputter).close();
        MorphAdornerUtils.logMemoryUsage(this.morphAdornerLogger, "After completing " + str + ": ");
    }

    protected void printWords(Document document) {
        Node namedItem;
        NodeList nodesByTagName = DOMUtils.getNodesByTagName(document, new String[]{"w", "pc"});
        if (nodesByTagName == null) {
            System.out.println("printWords: null node list found");
            return;
        }
        int length = nodesByTagName.getLength();
        for (int i = 0; i < length; i++) {
            Node item = nodesByTagName.item(i);
            NamedNodeMap attributes = item.getAttributes();
            String str = "";
            if (attributes != null && (namedItem = attributes.getNamedItem("xml:id")) != null) {
                str = namedItem.getTextContent();
            }
            System.out.println(item.getNodeName() + " " + str + " " + item.getTextContent() + " " + inSideText(item));
        }
    }

    protected void fixSideWords(Document document, Abbreviations abbreviations) {
        NodeList nodesByTagName = DOMUtils.getNodesByTagName(document, new String[]{"w", "pc"});
        if (nodesByTagName == null) {
            System.out.println("fixSideWords: null node list found");
            return;
        }
        for (int length = nodesByTagName.getLength() - 1; length >= 0; length--) {
            Element element = (Element) nodesByTagName.item(length);
            if (inSideText(element)) {
                String textContent = element.getTextContent();
                if (textContent.equals(".")) {
                    Element element2 = (Element) nodesByTagName.item(length - 1);
                    String str = element2.getTextContent() + textContent;
                    if (abbreviations.isKnownAbbreviation(str)) {
                        element2.getAttribute("xml:id");
                        element.getAttribute("xml:id");
                        String attribute = element.getAttribute("eos");
                        if (attribute == null) {
                            attribute = "0";
                        }
                        element2.setTextContent(str);
                        if (attribute.equals("1")) {
                            element.setAttribute("eos", "1");
                        }
                        element.getParentNode().removeChild(element);
                    }
                }
            }
        }
    }

    protected boolean inSideText(Node node) {
        boolean z = false;
        if (this.tagClassifier.isSideTextTag(node.getNodeName())) {
            z = true;
        } else {
            Node parentNode = node.getParentNode();
            while (!z && parentNode != null && parentNode.getNodeType() != 3) {
                z = z || this.tagClassifier.isSideTextTag(parentNode.getNodeName());
                if (!z) {
                    parentNode = parentNode.getParentNode();
                }
            }
        }
        return z;
    }

    public String getOutputFileName(String str) throws IOException {
        String path = new File(this.morphAdornerSettings.outputDirectoryName, FileNameUtils.stripPathName(str)).getPath();
        if (FileUtils.createPathForFile(path)) {
            return FileNameUtils.createVersionedFileName(path);
        }
        throw new IOException(this.morphAdornerSettings.getString("Unable_to_create_output_directory"));
    }

    public boolean doesOutputFileNameExist(String str) {
        return new File(this.morphAdornerSettings.outputDirectoryName, FileNameUtils.stripPathName(str)).exists();
    }

    /* JADX WARN: Multi-variable type inference failed */
    public AdornedWordOutputter adornFile(String str) throws IOException {
        this.morphAdornerLogger.println("Tagging", new Object[]{str});
        URL uRLFromFileNameOrURL = URLUtils.getURLFromFileNameOrURL(str);
        if (uRLFromFileNameOrURL == null) {
            this.morphAdornerLogger.println("Bad_file_name_or_URL", new Object[]{str});
            return null;
        }
        long currentTimeMillis = System.currentTimeMillis();
        try {
            TextInputter newTextInputter = TextInputterFactory.newTextInputter(this.morphAdornerSettings.properties);
            newTextInputter.enableGapFixer(this.morphAdornerSettings.fixGapTags);
            newTextInputter.enableOrigFixer(this.morphAdornerSettings.fixOrigTags);
            newTextInputter.loadText(uRLFromFileNameOrURL, "utf-8", this.morphAdornerSettings.xmlSchema);
            String segmentText = newTextInputter.getSegmentText(0);
            ((IsCloseableObject) newTextInputter).close();
            this.morphAdornerLogger.println("Loaded_text", new Object[]{str, MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis)});
            return adornText(segmentText, uRLFromFileNameOrURL);
        } catch (Exception e) {
            this.morphAdornerLogger.println("Unable_to_read_text", new Object[]{str});
            return null;
        }
    }

    public AdornedWordOutputter adornText(String str, URL url) throws IOException {
        String str2;
        long currentTimeMillis = System.currentTimeMillis();
        SentenceSplitter newSentenceSplitter = SentenceSplitterFactory.newSentenceSplitter(this.morphAdornerSettings.properties);
        ((UsesLogger) newSentenceSplitter).setLogger(this.morphAdornerLogger.getLogger());
        newSentenceSplitter.setPartOfSpeechGuesser(this.partOfSpeechGuesser);
        newSentenceSplitter.setAbbreviations(this.abbreviations);
        WordTokenizer newWordTokenizer = WordTokenizerFactory.newWordTokenizer(this.morphAdornerSettings.properties);
        newWordTokenizer.setPreTokenizer(PreTokenizerFactory.newPreTokenizer(this.morphAdornerSettings.properties));
        newWordTokenizer.setAbbreviations(this.abbreviations);
        List<List<String>> extractSentences = newSentenceSplitter.extractSentences(str, newWordTokenizer);
        int[] wordAndSentenceCounts = MorphAdornerUtils.getWordAndSentenceCounts(extractSentences);
        int i = wordAndSentenceCounts[1];
        this.morphAdornerLogger.println("Extracted_words", new Object[]{Formatters.formatIntegerWithCommas(i), Formatters.formatIntegerWithCommas(wordAndSentenceCounts[0]), MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis)});
        if (this.partOfSpeechGuesser != null) {
            this.partOfSpeechGuesser.setTryStandardSpellings(this.morphAdornerSettings.tryStandardSpellings);
        }
        boolean z = this.morphAdornerSettings.outputLemma && this.lemmatizer != null;
        boolean z2 = this.morphAdornerSettings.outputStandardSpelling && this.spellingStandardizer != null;
        boolean z3 = this.morphAdornerSettings.outputOriginalToken || this.morphAdornerSettings.useXMLHandler;
        this.morphAdornerSettings.setXMLWordAttributes(z3, z, z2);
        long currentTimeMillis2 = System.currentTimeMillis();
        List<List<AdornedWord>> tagSentences = this.tagger.tagSentences(extractSentences);
        this.morphAdornerLogger.println("Tagging_complete", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis2), Formatters.formatIntegerWithCommas((int) ((i / (System.currentTimeMillis() - currentTimeMillis2)) * 1000.0d))});
        this.morphAdornerLogger.println("Generating_other_adornments");
        long currentTimeMillis3 = System.currentTimeMillis();
        AdornedWordOutputter newAdornedWordOutputter = AdornedWordOutputterFactory.newAdornedWordOutputter(this.morphAdornerSettings.properties);
        newAdornedWordOutputter.setWordAttributeNames(this.morphAdornerSettings.getXMLWordAttributes());
        if (url != null) {
            newAdornedWordOutputter.createOutputFile(getOutputFileName(URLUtils.getFileNameFromURL(url, this.morphAdornerSettings.outputDirectoryName)), "utf-8", '\t');
        } else {
            newAdornedWordOutputter.createOutputFile(File.createTempFile("mad", null).getAbsolutePath(), "utf-8", '\t');
        }
        int i2 = 0;
        int i3 = 0;
        String str3 = "";
        String trim = this.morphAdornerSettings.xgOptions.getSurroundMarker().trim();
        String undeterminedTag = this.partOfSpeechTags.getUndeterminedTag();
        List<String> createNewList = ListFactory.createNewList();
        for (List<AdornedWord> list : tagSentences) {
            i2++;
            String str4 = i2 + "";
            int size = list.size() - 1;
            if (!this.morphAdornerSettings.outputRunningWordNumbers) {
                i3 = 0;
            }
            int i4 = 0;
            while (i4 < list.size()) {
                createNewList.clear();
                if (this.morphAdornerSettings.outputSentenceNumber) {
                    createNewList.add(str4);
                }
                i3++;
                if (this.morphAdornerSettings.outputWordNumber) {
                    createNewList.add(i3 + "");
                }
                AdornedWord adornedWord = list.get(i4);
                String token = adornedWord.getToken();
                if (z3) {
                    createNewList.add(token);
                }
                String spelling = adornedWord.getSpelling();
                String standardSpelling = adornedWord.getStandardSpelling();
                if (this.morphAdornerSettings.outputSpelling) {
                    createNewList.add(spelling);
                }
                String partsOfSpeech = adornedWord.getPartsOfSpeech();
                if (z2) {
                    standardSpelling = MorphAdornerUtils.getStandardizedSpelling(this, spelling, standardSpelling, partsOfSpeech);
                    if (this.spellingMapper != null) {
                        standardSpelling = this.spellingMapper.mapSpelling(standardSpelling);
                    }
                }
                if (z) {
                    str3 = !this.morphAdornerSettings.ignoreLexiconEntriesForLemmatization ? this.wordLexicon.getLemma(spelling, partsOfSpeech) : "*";
                    if (this.lemmatizer != null && (str3.equals("*") || this.partOfSpeechTags.countTags(partsOfSpeech) != this.lemmatizer.countLemmata(str3))) {
                        str3 = standardSpelling.length() > 0 ? MorphAdornerUtils.getLemma(this, standardSpelling, partsOfSpeech) : MorphAdornerUtils.getLemma(this, spelling, partsOfSpeech);
                    }
                    if (str3.indexOf(this.lemmaSeparator) < 0 && !this.partOfSpeechTags.isProperNounTag(partsOfSpeech)) {
                        str3 = str3.toLowerCase();
                    }
                }
                if (this.lemmatizer != null) {
                    if (this.partOfSpeechTags.countTags(partsOfSpeech) != this.lemmatizer.countLemmata(str3)) {
                        partsOfSpeech = undeterminedTag;
                    }
                    if (partsOfSpeech.equals(undeterminedTag) || str3.length() == 0) {
                        str3 = spelling.toLowerCase();
                        standardSpelling = spelling;
                        partsOfSpeech = undeterminedTag;
                    }
                }
                if (this.morphAdornerSettings.outputPartOfSpeech) {
                    createNewList.add(partsOfSpeech);
                }
                if (z2) {
                    createNewList.add(standardSpelling);
                }
                if (z) {
                    createNewList.add(str3);
                }
                if (this.morphAdornerSettings.outputEOSFlag || this.morphAdornerSettings.useXMLHandler) {
                    if (this.morphAdornerSettings.useXMLHandler) {
                        str2 = "0";
                        if (i4 >= size) {
                            str2 = "1";
                        } else if (list.get(i4 + 1).getToken().equals(trim) && (token.endsWith(".") || token.endsWith("!") || token.endsWith("?") || token.endsWith("'") || token.endsWith("\"") || token.endsWith(CharUtils.RSQUOTE_STRING) || token.endsWith(CharUtils.RDQUOTE_STRING) || token.endsWith("}") || token.endsWith("]") || token.endsWith(")"))) {
                            str2 = "1";
                        }
                    } else {
                        str2 = i4 >= size ? "1" : "0";
                    }
                    createNewList.add(str2);
                }
                if (this.morphAdornerSettings.outputKWIC) {
                    String[] kwic = MorphAdornerUtils.getKWIC(list, i4, this.morphAdornerSettings.outputKWICWidth);
                    createNewList.add(kwic[0]);
                    createNewList.add(kwic[2]);
                }
                newAdornedWordOutputter.outputWordAndAdornments(createNewList);
                i4++;
            }
        }
        newAdornedWordOutputter.close();
        if (url != null) {
            this.morphAdornerLogger.println("Adornments_written_to", new Object[]{getOutputFileName(URLUtils.getFileNameFromURL(url, this.morphAdornerSettings.outputDirectoryName)), MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis3)});
        } else {
            this.morphAdornerLogger.println("Adornments_generated", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis3)});
        }
        extractSentences.clear();
        tagSentences.clear();
        return newAdornedWordOutputter;
    }

    public void readorn(String str) throws SAXException, IOException, FileNotFoundException {
        this.morphAdornerLogger.println("Loading_previously_adorned");
        long currentTimeMillis = System.currentTimeMillis();
        StripWordAttributesFilter stripWordAttributesFilter = new StripWordAttributesFilter(XMLReaderFactory.createXMLReader());
        ExtendedAdornedWordFilter extendedAdornedWordFilter = new ExtendedAdornedWordFilter(stripWordAttributesFilter);
        String absolutePath = File.createTempFile("mad", null).getAbsolutePath();
        new FilterAdornedFile(str, absolutePath, extendedAdornedWordFilter);
        List<List<ExtendedAdornedWord>> sentences = extendedAdornedWordFilter.getSentences();
        this.morphAdornerLogger.println("Loaded_existing_words", new Object[]{Formatters.formatIntegerWithCommas(extendedAdornedWordFilter.getNumberOfWords()), Formatters.formatIntegerWithCommas(sentences.size()), MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis)});
        boolean canAddOrDeleteWords = this.retagger.getCanAddOrDeleteWords();
        if (this.retagger != null) {
            this.retagger.setCanAddOrDeleteWords(false);
        }
        long currentTimeMillis2 = System.currentTimeMillis();
        this.tagger.tagAdornedWordSentences(sentences, stripWordAttributesFilter.getRegIDSet());
        if (this.retagger != null) {
            this.retagger.setCanAddOrDeleteWords(canAddOrDeleteWords);
        }
        this.morphAdornerLogger.println("Tagging_complete", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis2), Formatters.formatIntegerWithCommas((int) ((extendedAdornedWordFilter.getNumberOfWords() / (System.currentTimeMillis() - currentTimeMillis2)) * 1000.0d))});
        this.morphAdornerLogger.println("Generating_other_adornments");
        long currentTimeMillis3 = System.currentTimeMillis();
        updateAdornedSentences(sentences, stripWordAttributesFilter.getRegIDSet());
        updateSplitWordAdornments(extendedAdornedWordFilter);
        this.morphAdornerLogger.println("Adornments_generated", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis3)});
        XMLFilter addWordAttributesFilter = new AddWordAttributesFilter(XMLReaderFactory.createXMLReader(), extendedAdornedWordFilter, this.morphAdornerSettings);
        XMLFilter xMLFilter = addWordAttributesFilter;
        if (this.morphAdornerSettings.outputPseudoPageBoundaryMilestones) {
            xMLFilter = new PseudoPageAdderFilter(addWordAttributesFilter, this.morphAdornerSettings.pseudoPageSize, this.morphAdornerSettings.pseudoPageContainerDivTypes);
        }
        String outputFileName = getOutputFileName(str);
        this.morphAdornerLogger.println("Writing_merged", new Object[]{outputFileName});
        long currentTimeMillis4 = System.currentTimeMillis();
        new FilterAdornedFile(absolutePath, outputFileName, xMLFilter);
        this.morphAdornerLogger.println("Adorned_XML_written", new Object[]{outputFileName, MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis4)});
        FileUtils.deleteFile(absolutePath);
    }

    public void updateAdornedSentences(List<List<ExtendedAdornedWord>> list, Set<String> set) {
        for (int i = 0; i < list.size(); i++) {
            updateAdornedSentence(list.get(i), set);
        }
    }

    protected void updateSplitWordAdornments(ExtendedAdornedWordFilter extendedAdornedWordFilter) {
        List<String> adornedWordIDs = extendedAdornedWordFilter.getAdornedWordIDs();
        for (int i = 0; i < adornedWordIDs.size(); i++) {
            ExtendedAdornedWord extendedAdornedWord = extendedAdornedWordFilter.getExtendedAdornedWord(adornedWordIDs.get(i));
            if (extendedAdornedWord.isSplitWord() && extendedAdornedWord.isFirstPart()) {
                List<String> relatedSplitWordIDs = extendedAdornedWordFilter.getRelatedSplitWordIDs(extendedAdornedWord.getID());
                for (int i2 = 0; i2 < relatedSplitWordIDs.size(); i2++) {
                    ExtendedAdornedWord extendedAdornedWord2 = extendedAdornedWordFilter.getExtendedAdornedWord(relatedSplitWordIDs.get(i2));
                    extendedAdornedWord2.setPartsOfSpeech(extendedAdornedWord.getPartsOfSpeech());
                    extendedAdornedWord2.setLemmata(extendedAdornedWord.getLemmata());
                    extendedAdornedWord2.setSpelling(extendedAdornedWord.getSpelling());
                    extendedAdornedWord2.setStandardSpelling(extendedAdornedWord.getStandardSpelling());
                }
            }
        }
    }

    public void updateAdornedSentence(List<ExtendedAdornedWord> list, Set<String> set) {
        for (int i = 0; i < list.size(); i++) {
            ExtendedAdornedWord extendedAdornedWord = list.get(i);
            String id = extendedAdornedWord.getID();
            extendedAdornedWord.getToken();
            String spelling = extendedAdornedWord.getSpelling();
            String standardSpelling = extendedAdornedWord.getStandardSpelling();
            String partsOfSpeech = extendedAdornedWord.getPartsOfSpeech();
            String standardizedSpelling = MorphAdornerUtils.getStandardizedSpelling(this, set.contains(id) ? standardSpelling : spelling, standardSpelling, partsOfSpeech);
            if (this.spellingMapper != null) {
                standardizedSpelling = this.spellingMapper.mapSpelling(standardizedSpelling);
            }
            extendedAdornedWord.setStandardSpelling(standardizedSpelling);
            String lemma = !this.morphAdornerSettings.ignoreLexiconEntriesForLemmatization ? this.wordLexicon.getLemma(spelling, partsOfSpeech) : "*";
            if (lemma.equals("*") && this.lemmatizer != null) {
                lemma = standardizedSpelling.length() > 0 ? MorphAdornerUtils.getLemma(this, standardizedSpelling, partsOfSpeech) : MorphAdornerUtils.getLemma(this, spelling, partsOfSpeech);
            }
            if (lemma.indexOf(this.lemmaSeparator) < 0 && !this.partOfSpeechTags.isProperNounTag(partsOfSpeech)) {
                lemma = lemma.toLowerCase();
            }
            extendedAdornedWord.setLemmata(lemma);
        }
    }

    public void addAbbreviations(Abbreviations abbreviations, String str, String str2) {
        long currentTimeMillis = System.currentTimeMillis();
        int abbreviationsCount = abbreviations.getAbbreviationsCount();
        abbreviations.loadAbbreviations(str);
        this.morphAdornerLogger.println(str2, new Object[]{Formatters.formatIntegerWithCommas(abbreviations.getAbbreviationsCount() - abbreviationsCount), MorphAdornerUtils.durationString(this.morphAdornerSettings, currentTimeMillis)});
    }

    /* JADX WARN: Multi-variable type inference failed */
    protected static void mergeXML(TextInputter textInputter, String str) {
        try {
            FileOutputStream fileOutputStream = new FileOutputStream(new File(str), false);
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
            OutputStreamWriter outputStreamWriter = new OutputStreamWriter(bufferedOutputStream, "utf-8");
            SortedArrayList sortedArrayList = new SortedArrayList();
            int segmentCount = textInputter.getSegmentCount();
            for (int i = 0; i < segmentCount; i++) {
                sortedArrayList.add(textInputter.getSegmentName(i));
            }
            String str2 = "";
            for (int i2 = 0; i2 < sortedArrayList.size(); i2++) {
                String str3 = ((String) sortedArrayList.get(i2)).toString();
                String segmentText = textInputter.getSegmentText(str3);
                if (str3.equals("head")) {
                    int indexOfIgnoreCase = StringUtils.indexOfIgnoreCase(segmentText, "</eebo");
                    if (indexOfIgnoreCase < 0) {
                        indexOfIgnoreCase = segmentText.indexOf("</TEI");
                    }
                    if (indexOfIgnoreCase < 0) {
                        indexOfIgnoreCase = segmentText.indexOf("</tei.");
                    }
                    if (indexOfIgnoreCase >= 0) {
                        str2 = segmentText.substring(indexOfIgnoreCase);
                        segmentText = segmentText.substring(0, indexOfIgnoreCase);
                    }
                } else if (str3.equals("text")) {
                    segmentText = StringUtils.replaceAll(segmentText.trim(), "/>", ">");
                    str2 = segmentText.startsWith("<group") ? "</group>" + str2 : segmentText.startsWith("<GROUP") ? "</GROUP>" + str2 : segmentText.startsWith("<text") ? "</text>" + str2 : "</TEXT>" + str2;
                    if (segmentText.endsWith("</text>") || segmentText.endsWith("</TEXT>")) {
                        segmentText = segmentText.substring(0, segmentText.length() - 7);
                    }
                }
                while (segmentText.endsWith(" >")) {
                    segmentText = segmentText.substring(0, segmentText.length() - 2) + ">";
                }
                outputStreamWriter.write(segmentText, 0, segmentText.length());
            }
            String replaceAll = StringUtils.replaceAll(str2, " >", ">");
            outputStreamWriter.write(replaceAll, 0, replaceAll.length());
            outputStreamWriter.close();
            bufferedOutputStream.close();
            fileOutputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.gc();
    }

    public static void main(String[] strArr) {
        MorphAdorner morphAdorner = new MorphAdorner(strArr);
        if (morphAdorner.morphAdornerSettings.fileNames.length > 0) {
            morphAdorner.processInputFiles(morphAdorner.morphAdornerSettings.tokenizeOnly);
        } else {
            morphAdorner.morphAdornerLogger.println("No_files_found_to_process");
        }
        morphAdorner.morphAdornerLogger.terminate();
    }

    public static MorphAdorner createAdorner(String str, boolean z, String[] strArr, String str2, String str3) {
        MorphAdorner morphAdorner = storedAdorners.get(str);
        if (z || morphAdorner == null) {
            morphAdorner = new MorphAdorner(strArr, str2, str3);
            storedAdorners.put(str, morphAdorner);
        } else if (!morphAdorner.morphAdornerLogger.getLogger().isLoggerEnabled()) {
            morphAdorner.morphAdornerLogger.setLogger(morphAdorner.morphAdornerLogger.createWrappedLogger(str2, str3));
        }
        return morphAdorner;
    }

    public static MorphAdorner runAdorner(MorphAdorner morphAdorner, String str, String[] strArr, boolean z) {
        if (morphAdorner == null) {
            return null;
        }
        morphAdorner.morphAdornerSettings.outputDirectoryName = str;
        morphAdorner.morphAdornerSettings.fileNames = FileNameUtils.expandFileNameWildcards(strArr);
        if (morphAdorner.morphAdornerSettings.fileNames.length > 0) {
            morphAdorner.processInputFiles(z);
        } else {
            morphAdorner.morphAdornerLogger.println("No_files_found_to_process");
        }
        return morphAdorner;
    }

    public static MorphAdorner runAdorner(String str, String str2, String[] strArr, boolean z) {
        return runAdorner(storedAdorners.get(str), str2, strArr, z);
    }

    public static MorphAdorner createAndRunAdorner(String str, boolean z, String[] strArr, String str2, String str3, String str4, String[] strArr2, boolean z2) {
        createAdorner(str, z, strArr, str2, str3);
        return runAdorner(storedAdorners.get(str), str4, strArr2, z2);
    }

    public void finalize() throws Throwable {
        try {
            this.morphAdornerLogger.terminate();
        } catch (Exception e) {
        }
        super.finalize();
    }
}
