package edu.northwestern.at.morphadorner;

import com.megginson.sax.XMLWriter;
import edu.northwestern.at.morphadorner.MorphAdornerSettings;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencemelder.XMLSentenceMelder;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.QueueStack;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.SortedArrayList;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.math.ArithUtils;
import edu.northwestern.at.utils.xml.ExtendedXMLFilterImpl;
import java.text.NumberFormat;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:edu/northwestern/at/morphadorner/IDFixerFilter.class */
public class IDFixerFilter extends ExtendedXMLFilterImpl {
    protected static final NumberFormat ID_FORMATTER = NumberFormat.getInstance();
    protected static final NumberFormat PAGE_FORMATTER = NumberFormat.getInstance();
    protected static final NumberFormat WORD_FORMATTER = NumberFormat.getInstance();
    protected int wordOrdinal;
    protected String lastID;
    protected String lastIDString;
    protected String id;
    protected String idAttrName;
    protected String baseFileName;
    protected PartOfSpeechTags posTags;
    protected String elementURI;
    protected boolean outputWhitespace;
    protected boolean outputNonredundantAttributesOnly;
    protected boolean outputNonredundantTokenAttribute;
    protected boolean outputNonredundantPartAttribute;
    protected boolean outputNonredundantEosAttribute;
    protected boolean outputSentenceBoundaryMilestones;
    protected boolean usePCToMarkEndOfSentence;
    protected boolean outputPseudoPageBoundaryMilestones;
    protected int pseudoPageSize;
    protected int pseudoPageCount;
    protected int pseudoPageWordCount;
    protected boolean pseudoPageStarted;
    protected int emittedWordCount;
    protected XMLSentenceMelder sentenceMelder;
    protected boolean isFirstWord;
    protected PendingElement pendingWordElement;
    protected Map<Integer, Integer> splitWords;
    protected Map<Integer, Integer> splitWordsCopy;
    protected QueueStack<String> foreignStack;
    protected QueueStack<XMLWriterState> jumpStack;
    protected QueueStack<String> divStack;
    protected Set<String> pseudoPageContainerDivTypes;
    protected SortedArrayList<SentenceAndWordNumber> sortedWords;
    protected XMLWriter writer;
    protected int totalWordsToEmit;
    protected int pageNumber;
    protected int wordNumberWithinPage;
    protected String nFromPB;
    protected String facsFromPB;
    protected String prevFacsFromPB;
    protected int pageColumn;
    protected int idSpacing;
    protected MorphAdornerSettings.XMLIDType idType;
    protected boolean xmlTokenLabelEmit;
    protected String xmlTokenLabelAttribute;
    protected int xmlTokenLabelSpacing;
    protected boolean xmlTokenLabelPrependWorkName;
    protected boolean outputWordOrdinal;
    protected static Map<String, String> languageTags;
    protected int gapCount;
    protected MorphAdornerSettings morphAdornerSettings;
    protected boolean tokenizingOnly;
    protected boolean zzzzljTagSeen;

    public IDFixerFilter(XMLReader xMLReader, PartOfSpeechTags partOfSpeechTags, String str, int i, SortedArrayList<SentenceAndWordNumber> sortedArrayList, Map<Integer, Integer> map, int i2, int i3, MorphAdornerSettings morphAdornerSettings, boolean z) {
        super(xMLReader);
        this.wordOrdinal = 0;
        this.lastID = "";
        this.lastIDString = "";
        this.id = "";
        this.idAttrName = WordAttributeNames.id;
        this.elementURI = null;
        this.outputWhitespace = true;
        this.outputNonredundantAttributesOnly = false;
        this.outputNonredundantTokenAttribute = false;
        this.outputNonredundantPartAttribute = false;
        this.outputNonredundantEosAttribute = false;
        this.outputSentenceBoundaryMilestones = false;
        this.usePCToMarkEndOfSentence = false;
        this.outputPseudoPageBoundaryMilestones = false;
        this.pseudoPageSize = 500;
        this.pseudoPageCount = 0;
        this.pseudoPageWordCount = 0;
        this.pseudoPageStarted = false;
        this.emittedWordCount = 0;
        this.isFirstWord = false;
        this.pendingWordElement = null;
        this.foreignStack = new QueueStack<>();
        this.jumpStack = new QueueStack<>();
        this.divStack = new QueueStack<>();
        this.pseudoPageContainerDivTypes = SetFactory.createNewSet();
        this.totalWordsToEmit = 0;
        this.pageNumber = 0;
        this.wordNumberWithinPage = 0;
        this.nFromPB = "";
        this.facsFromPB = "";
        this.prevFacsFromPB = "";
        this.pageColumn = 0;
        this.idSpacing = 10;
        this.idType = MorphAdornerSettings.XMLIDType.READING_CONTEXT_ORDER;
        this.xmlTokenLabelEmit = false;
        this.xmlTokenLabelAttribute = "n";
        this.xmlTokenLabelSpacing = 10;
        this.xmlTokenLabelPrependWorkName = false;
        this.outputWordOrdinal = true;
        this.gapCount = 0;
        this.morphAdornerSettings = null;
        this.tokenizingOnly = false;
        this.zzzzljTagSeen = false;
        this.morphAdornerSettings = morphAdornerSettings;
        this.idAttrName = morphAdornerSettings.xgOptions.getIdArgumentName();
        this.outputNonredundantAttributesOnly = morphAdornerSettings.outputNonredundantAttributesOnly;
        this.outputNonredundantTokenAttribute = morphAdornerSettings.outputNonredundantTokenAttribute;
        this.outputNonredundantPartAttribute = morphAdornerSettings.outputNonredundantPartAttribute;
        this.outputNonredundantEosAttribute = morphAdornerSettings.outputNonredundantEosAttribute;
        this.outputSentenceBoundaryMilestones = morphAdornerSettings.outputSentenceBoundaryMilestones;
        this.usePCToMarkEndOfSentence = morphAdornerSettings.usePCToMarkEndOfSentence;
        this.outputWordOrdinal = morphAdornerSettings.outputWordOrdinal;
        this.xmlTokenLabelEmit = morphAdornerSettings.xmlTokenLabelEmit;
        this.xmlTokenLabelAttribute = morphAdornerSettings.xmlTokenLabelAttribute;
        this.xmlTokenLabelSpacing = morphAdornerSettings.xmlTokenLabelSpacing;
        this.xmlTokenLabelPrependWorkName = morphAdornerSettings.xmlTokenLabelPrependWorkName;
        this.outputPseudoPageBoundaryMilestones = morphAdornerSettings.outputPseudoPageBoundaryMilestones;
        this.pseudoPageSize = morphAdornerSettings.pseudoPageSize;
        for (String str2 : StringUtils.makeTokenArray(morphAdornerSettings.pseudoPageContainerDivTypes)) {
            this.pseudoPageContainerDivTypes.add(str2.toLowerCase());
        }
        this.outputWhitespace = morphAdornerSettings.outputWhitespaceElements;
        this.sortedWords = sortedArrayList;
        setSplitWords(map);
        setPosTags(partOfSpeechTags);
        setIDFormat(str, i, i3);
        this.totalWordsToEmit = i2;
        this.elementURI = null;
        new SortedArrayList();
        this.tokenizingOnly = z;
    }

    protected String generateTokenLabel() {
        if (StringUtils.safeString(this.nFromPB).length() == 0) {
        }
        String safeString = StringUtils.safeString(this.facsFromPB);
        if (safeString.length() > 0) {
            safeString = safeString + "-";
        }
        String str = "";
        if (this.xmlTokenLabelPrependWorkName && this.baseFileName.length() > 0) {
            str = this.baseFileName + "-";
        }
        return str + safeString + (((char) (97 + this.pageColumn)) + "-") + WORD_FORMATTER.format(this.wordNumberWithinPage * this.xmlTokenLabelSpacing);
    }

    @Override // org.xml.sax.helpers.XMLFilterImpl, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        String str4;
        String str5;
        AttributesImpl attributesImpl = new AttributesImpl(attributes);
        boolean z = true;
        String value = attributes.getValue(WordAttributeNames.p);
        this.foreignStack.push(getForeignLanguageTag(str3, attributes));
        if (value != null && value.length() > 0) {
            setAttributeValue(attributesImpl, WordAttributeNames.p, "\\" + this.baseFileName + value);
        }
        String value2 = attributes.getValue("TEIform");
        if (value2 != null && value2.length() > 0) {
            removeAttribute(attributesImpl, "TEIform");
        }
        if (str3.equalsIgnoreCase("pb")) {
            this.pageNumber++;
            this.wordNumberWithinPage = 0;
            this.nFromPB = attributes.getValue("n");
            if (this.nFromPB == null) {
                this.nFromPB = attributes.getValue("N");
            }
            this.facsFromPB = attributes.getValue("facs");
            if (this.facsFromPB == null) {
                this.facsFromPB = attributes.getValue("ref");
            }
            if (this.facsFromPB == null) {
                this.facsFromPB = attributes.getValue("REF");
            }
            this.facsFromPB = StringUtils.safeString(this.facsFromPB);
            if (this.facsFromPB.length() > 0) {
                if (this.facsFromPB.equals(this.prevFacsFromPB)) {
                    this.pageColumn++;
                } else {
                    this.pageColumn = 0;
                }
            }
            this.prevFacsFromPB = this.facsFromPB;
        }
        if (str3.equalsIgnoreCase("w") || str3.equalsIgnoreCase("pc")) {
            this.gapCount = 0;
            z = false;
            this.id = attributes.getValue(this.idAttrName);
            String value3 = attributes.getValue(WordAttributeNames.tok);
            String value4 = attributes.getValue(WordAttributeNames.spe);
            String value5 = attributes.getValue(WordAttributeNames.pos);
            String value6 = attributes.getValue(WordAttributeNames.eos);
            String value7 = attributes.getValue(WordAttributeNames.lem);
            String value8 = attributes.getValue(WordAttributeNames.reg);
            attributes.getValue(WordAttributeNames.part);
            String value9 = attributes.getValue(WordAttributeNames.rend);
            String value10 = attributes.getValue(WordAttributeNames.type);
            String value11 = attributes.getValue(WordAttributeNames.unit);
            String replaceAll = StringUtils.replaceAll(value3, CharUtils.CHAR_FAKE_SOFT_HYPHEN_STRING, "-");
            if (replaceAll.indexOf(CharUtils.CHAR_SUP_TEXT_MARKER_STRING) >= 0) {
                replaceAll = StringUtils.replaceAll(replaceAll, CharUtils.CHAR_SUP_TEXT_MARKER_STRING, "");
                if (value4 != null) {
                    value4 = StringUtils.replaceAll(value4, CharUtils.CHAR_SUP_TEXT_MARKER_STRING, "");
                }
                if (value7 != null) {
                    value7 = StringUtils.replaceAll(value7, CharUtils.CHAR_SUP_TEXT_MARKER_STRING, "");
                }
            }
            int parseInt = Integer.parseInt(this.id);
            boolean z2 = !this.id.equals(this.lastID);
            if (this.pendingWordElement != null && z2) {
                emitWordElement(this.pendingWordElement.getURI(), this.pendingWordElement.getLocalName(), this.pendingWordElement.getQName(), this.pendingWordElement.getAttributes(), this.pendingWordElement.getText(), true, false);
                this.pendingWordElement = null;
            }
            if (this.splitWords.containsKey(Integer.valueOf(parseInt))) {
                int intValue = this.splitWordsCopy.get(Integer.valueOf(parseInt)).intValue();
                if (intValue == this.splitWords.get(Integer.valueOf(parseInt)).intValue()) {
                    str4 = "I";
                    this.wordNumberWithinPage++;
                } else {
                    str4 = intValue <= 1 ? "F" : "M";
                }
                this.splitWordsCopy.put(Integer.valueOf(parseInt), Integer.valueOf(intValue - 1));
            } else {
                str4 = "N";
                this.wordNumberWithinPage++;
            }
            if (StringUtils.safeString(this.nFromPB).length() == 0) {
            }
            StringUtils.safeString(this.facsFromPB);
            String str6 = this.baseFileName + "-";
            switch (this.idType) {
                case READING_CONTEXT_ORDER:
                    str6 = str6 + ID_FORMATTER.format(parseInt * this.idSpacing);
                    break;
                case WORD_WITHIN_PAGE_BLOCK:
                    str6 = str6 + PAGE_FORMATTER.format(this.pageNumber) + "-" + WORD_FORMATTER.format(this.wordNumberWithinPage * this.idSpacing);
                    break;
                case USE_PAGE_BLOCK_ATTRIBUTES:
                    str6 = str6 + generateTokenLabel();
                    break;
            }
            if (!str4.equals("N")) {
                str6 = str6 + "." + (this.splitWords.get(Integer.valueOf(parseInt)).intValue() - this.splitWordsCopy.get(Integer.valueOf(parseInt)).intValue());
            }
            setAttributeValue(attributesImpl, this.idAttrName, str6);
            if (this.xmlTokenLabelEmit) {
                setAttributeValue(attributesImpl, this.xmlTokenLabelAttribute, generateTokenLabel());
            }
            if (z2) {
                this.wordOrdinal++;
            }
            if (this.outputWordOrdinal) {
                setAttributeValue(attributesImpl, WordAttributeNames.ord, this.wordOrdinal + "");
            }
            this.lastID = this.id;
            this.lastIDString = str6;
            if (!this.foreignStack.isEmpty() && this.foreignStack.peek().length() > 0 && !this.foreignStack.peek().equals("fw-en") && !this.posTags.isNumberTag(value5) && !this.posTags.isSymbolTag(value5) && !this.posTags.isPunctuationTag(value5)) {
                value5 = this.foreignStack.peek();
                value7 = value4;
            }
            if (!this.divStack.isEmpty() && this.divStack.peek().length() > 0 && this.divStack.peek().equals("errata") && !this.posTags.isNumberTag(value5) && !this.posTags.isSymbolTag(value5) && !this.posTags.isPunctuationTag(value5)) {
                value5 = "zz";
                value7 = value4;
            }
            if (value4 == null) {
                value4 = replaceAll;
            }
            if (value5 == null) {
                value5 = value4;
            }
            if (value7 == null) {
                value7 = value4;
            }
            if (value6 == null) {
                value6 = "0";
            }
            if (value8 == null) {
                value8 = value4;
            }
            setAttributeValue(attributesImpl, WordAttributeNames.eos, value6);
            if (this.tokenizingOnly || !this.morphAdornerSettings.outputLemma) {
                removeAttribute(attributesImpl, WordAttributeNames.lem);
            } else {
                setAttributeValue(attributesImpl, WordAttributeNames.lem, value7);
            }
            if (this.tokenizingOnly || !this.morphAdornerSettings.outputPartOfSpeech) {
                removeAttribute(attributesImpl, WordAttributeNames.pos);
            } else {
                setAttributeValue(attributesImpl, WordAttributeNames.pos, value5);
            }
            if (this.tokenizingOnly || !this.morphAdornerSettings.outputStandardSpelling) {
                removeAttribute(attributesImpl, WordAttributeNames.reg);
            } else {
                setAttributeValue(attributesImpl, WordAttributeNames.reg, value8);
            }
            if (this.tokenizingOnly || !this.morphAdornerSettings.outputSpelling) {
                removeAttribute(attributesImpl, WordAttributeNames.spe);
            } else {
                setAttributeValue(attributesImpl, WordAttributeNames.spe, value4);
            }
            if (this.tokenizingOnly || !this.morphAdornerSettings.outputOriginalToken) {
                removeAttribute(attributesImpl, WordAttributeNames.tok);
            } else {
                setAttributeValue(attributesImpl, WordAttributeNames.tok, replaceAll);
            }
            setAttributeValue(attributesImpl, WordAttributeNames.part, str4);
            if (value9 != null) {
                setAttributeValue(attributesImpl, WordAttributeNames.rend, value9);
            }
            if (value10 != null) {
                setAttributeValue(attributesImpl, WordAttributeNames.type, value10);
            }
            if (value11 != null) {
                setAttributeValue(attributesImpl, WordAttributeNames.unit, value11);
            }
            if (this.outputNonredundantAttributesOnly) {
                if (value6.equals("0")) {
                    removeAttribute(attributesImpl, WordAttributeNames.eos);
                }
                if (value4.equals(replaceAll)) {
                    removeAttribute(attributesImpl, WordAttributeNames.spe);
                }
                if (value7.equals(value4)) {
                    removeAttribute(attributesImpl, WordAttributeNames.lem);
                }
                if (value5.equals(value4)) {
                    removeAttribute(attributesImpl, WordAttributeNames.pos);
                }
                if (value8.equals(value4)) {
                    removeAttribute(attributesImpl, WordAttributeNames.reg);
                }
                if (str4.equals("N")) {
                    removeAttribute(attributesImpl, WordAttributeNames.part);
                }
            }
            if (z2) {
                if (this.outputWhitespace) {
                    if (this.sentenceMelder.shouldOutputBlank(value4, this.isFirstWord)) {
                        this.sentenceMelder.outputBlank();
                    }
                    this.sentenceMelder.processWord(value4);
                }
                this.isFirstWord = value6.equals("1");
            }
            this.pendingWordElement = new PendingElement(str, str2, str3, attributesImpl);
        } else {
            if (this.pendingWordElement != null) {
                emitWordElement(this.pendingWordElement.getURI(), this.pendingWordElement.getLocalName(), this.pendingWordElement.getQName(), this.pendingWordElement.getAttributes(), this.pendingWordElement.getText(), true, false);
                this.pendingWordElement = null;
                this.gapCount = 0;
            }
            if (str3.equalsIgnoreCase("gap")) {
                String value12 = attributes.getValue(this.idAttrName);
                if (value12 == null || value12.length() == 0) {
                    if (this.lastIDString.length() == 0) {
                        String str7 = this.baseFileName + "-";
                        switch (this.idType) {
                            case READING_CONTEXT_ORDER:
                                str7 = str7 + ID_FORMATTER.format(0L);
                                break;
                            case WORD_WITHIN_PAGE_BLOCK:
                                str7 = str7 + PAGE_FORMATTER.format(this.pageNumber) + "-" + WORD_FORMATTER.format(0L);
                                break;
                            case USE_PAGE_BLOCK_ATTRIBUTES:
                                str7 = str7 + generateTokenLabel();
                                break;
                        }
                        str5 = str7 + "-gap" + this.gapCount;
                    } else {
                        str5 = this.lastIDString + "-gap" + this.gapCount;
                    }
                    this.gapCount++;
                    setAttributeValue(attributesImpl, "xml:id", str5);
                    if (this.xmlTokenLabelEmit) {
                        this.wordNumberWithinPage++;
                        setAttributeValue(attributesImpl, this.xmlTokenLabelAttribute, generateTokenLabel());
                    }
                }
            } else if (str3.equalsIgnoreCase("div")) {
                String value13 = attributes.getValue("type");
                if (value13 == null) {
                    value13 = attributes.getValue("TYPE");
                }
                if (value13 == null || value13.length() == 0) {
                    value13 = "*div";
                }
                this.divStack.push(value13.toLowerCase());
            } else if (!str3.equalsIgnoreCase("foreign") && !this.morphAdornerSettings.xgOptions.isSoftTag(str3)) {
                if (this.morphAdornerSettings.xgOptions.isJumpTag(str3)) {
                    this.jumpStack.push(new XMLWriterState(this.isFirstWord, this.sentenceMelder));
                }
                this.sentenceMelder.reset();
                this.isFirstWord = true;
            }
        }
        if (this.elementURI == null) {
            this.elementURI = str;
            if (this.outputWhitespace) {
                this.sentenceMelder.setURI(this.elementURI);
            }
        }
        if (z) {
            if (!str3.startsWith("zzzz")) {
                super.startElement(str, str2, str3, attributesImpl);
            } else if (str3.equals("zzzzbl")) {
                this.sentenceMelder.outputBlank();
            } else {
                if (str3.equals("zzzzlj")) {
                    return;
                }
                this.sentenceMelder.outputBlank();
            }
        }
    }

    @Override // org.xml.sax.helpers.XMLFilterImpl, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.pendingWordElement != null) {
            this.pendingWordElement.appendText(cArr, i, i2);
        } else {
            super.characters(cArr, i, i2);
        }
    }

    public void emitWordElement(String str, String str2, String str3, AttributesImpl attributesImpl, String str4, boolean z, boolean z2) throws SAXException {
        String value;
        String value2 = attributesImpl.getValue(WordAttributeNames.p);
        String value3 = attributesImpl.getValue(WordAttributeNames.part);
        boolean z3 = value3 == null || value3.equals("N") || value3.equals("I");
        boolean z4 = value3 == null || value3.equals("N") || value3.equals("F");
        if (z4 && this.outputPseudoPageBoundaryMilestones && this.pseudoPageWordCount == 0 && !this.pseudoPageStarted) {
            if (value2 != null && value2.length() > 0) {
                int lastIndexOf = value2.lastIndexOf("\\");
                if (lastIndexOf > 0) {
                    value2 = value2.substring(0, lastIndexOf);
                }
                value2 = value2 + "\\milestone[" + (this.pseudoPageCount + 1) + "]";
            }
            emitPseudoPageElement(createPseudoPageElement(str, false, true, value2));
        }
        this.pseudoPageWordCount++;
        if (z2) {
            setAttributeValue(attributesImpl, WordAttributeNames.eos, "1");
        }
        String value4 = attributesImpl.getValue(WordAttributeNames.eos);
        boolean z5 = value4 != null && value4.equals("1");
        if (!this.morphAdornerSettings.outputEOSFlag) {
            removeAttribute(attributesImpl, WordAttributeNames.eos);
        }
        if ((this.outputNonredundantAttributesOnly || this.outputNonredundantTokenAttribute) && (value = attributesImpl.getValue(WordAttributeNames.tok)) != null && value.equals(str4)) {
            removeAttribute(attributesImpl, WordAttributeNames.tok);
        }
        if ((this.outputNonredundantAttributesOnly || this.outputNonredundantPartAttribute) && value3 != null && value3.equals("N")) {
            removeAttribute(attributesImpl, WordAttributeNames.part);
        }
        if ((this.outputNonredundantAttributesOnly || this.outputNonredundantEosAttribute) && value4 != null && value4.equals("0")) {
            removeAttribute(attributesImpl, WordAttributeNames.eos);
        }
        if (z5 && this.usePCToMarkEndOfSentence) {
            removeAttribute(attributesImpl, WordAttributeNames.eos);
            if (str3.equalsIgnoreCase("pc")) {
                setAttributeValue(attributesImpl, "unit", "sentence");
            }
        }
        removeAttribute(attributesImpl, WordAttributeNames.sn);
        removeAttribute(attributesImpl, WordAttributeNames.wn);
        super.startElement(str, str2, str3, attributesImpl);
        String replaceAll = StringUtils.replaceAll(StringUtils.replaceAll(str4, CharUtils.CHAR_FAKE_SOFT_HYPHEN_STRING, "-"), CharUtils.CHAR_SUP_TEXT_MARKER_STRING, "");
        super.characters(replaceAll.toCharArray(), 0, replaceAll.length());
        super.endElement(str, str2, str3);
        String value5 = attributesImpl.getValue(this.idAttrName);
        String value6 = attributesImpl.getValue(WordAttributeNames.ord);
        if (value6 == null) {
            value6 = "0";
        }
        this.sortedWords.add(new SentenceAndWordNumber(value5, Integer.parseInt(value6), value3, z5));
        if (z5 && this.usePCToMarkEndOfSentence && !str3.equalsIgnoreCase("pc")) {
            AttributesImpl attributesImpl2 = new AttributesImpl();
            setAttributeValue(attributesImpl2, "unit", "sentence");
            setAttributeValue(attributesImpl2, "xml:id", value5 + "-eos");
            this.wordNumberWithinPage++;
            if (this.xmlTokenLabelEmit) {
                setAttributeValue(attributesImpl2, this.xmlTokenLabelAttribute, generateTokenLabel());
            }
            super.startElement(str, "pc", "pc", attributesImpl2);
            super.endElement(str, "pc", "pc");
        }
        this.emittedWordCount++;
        if (this.outputWhitespace && z && this.isFirstWord && z4) {
            this.sentenceMelder.outputBlank();
        }
        if (z4 && this.outputPseudoPageBoundaryMilestones) {
            if (this.pseudoPageWordCount >= this.pseudoPageSize || this.emittedWordCount >= this.totalWordsToEmit) {
                if (value2 != null && value2.length() > 0) {
                    int lastIndexOf2 = value2.lastIndexOf("\\");
                    if (lastIndexOf2 > 0) {
                        value2 = value2.substring(0, lastIndexOf2);
                    }
                    value2 = value2 + "\\milestone[" + (this.pseudoPageCount + 1) + "]";
                }
                emitPseudoPageElement(createPseudoPageElement(str, false, false, value2));
            }
        }
    }

    @Override // org.xml.sax.helpers.XMLFilterImpl, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        boolean z = false;
        String str4 = "";
        if (!this.foreignStack.isEmpty()) {
            this.foreignStack.pop();
        } else if (str3.equalsIgnoreCase("div") && !this.divStack.isEmpty()) {
            str4 = this.divStack.pop();
            z = true;
        }
        boolean isJumpTag = this.morphAdornerSettings.xgOptions.isJumpTag(str3);
        boolean isSoftTag = this.morphAdornerSettings.xgOptions.isSoftTag(str3);
        boolean z2 = (isJumpTag || isSoftTag) ? false : true;
        boolean z3 = str3.equalsIgnoreCase("w") || str3.equalsIgnoreCase("pc");
        if (this.pendingWordElement != null && !z3) {
            emitWordElement(this.pendingWordElement.getURI(), this.pendingWordElement.getLocalName(), this.pendingWordElement.getQName(), this.pendingWordElement.getAttributes(), this.pendingWordElement.getText(), z3 || isSoftTag, (z2 && this.morphAdornerSettings.closeSentenceAtEndOfHardTag) || (isJumpTag && this.morphAdornerSettings.closeSentenceAtEndOfJumpTag) || str3.equalsIgnoreCase("sp") || str3.equalsIgnoreCase("speaker"));
            this.pendingWordElement = null;
        }
        if (!z3 && !str3.startsWith("zzzz")) {
            super.endElement(str, str2, str3);
        }
        if (isJumpTag) {
            if (!this.jumpStack.isEmpty()) {
                XMLWriterState pop = this.jumpStack.pop();
                this.isFirstWord = pop.getIsFirstWord();
                this.sentenceMelder.setState(pop.getSentenceMelderState());
            }
        } else if (!isSoftTag) {
            this.sentenceMelder.reset();
            this.isFirstWord = true;
        }
        String str5 = null;
        if (this.outputPseudoPageBoundaryMilestones && z && this.pseudoPageContainerDivTypes.contains(str4)) {
            if (0 != 0 && str5.length() > 0) {
                int lastIndexOf = str5.lastIndexOf("\\");
                if (lastIndexOf > 0) {
                    str5 = str5.substring(0, lastIndexOf);
                }
                str5 = str5 + "\\milestone[" + (this.pseudoPageCount + 1) + "]";
            }
            if (this.emittedWordCount < this.totalWordsToEmit) {
                emitPseudoPageElement(createPseudoPageElement(str, false, false, str5));
                emitPseudoPageElement(createPseudoPageElement(str, false, true, str5));
            }
        }
    }

    public PendingElement createPseudoPageElement(String str, boolean z, boolean z2, String str2) {
        if (z2) {
            this.pseudoPageCount++;
            this.pseudoPageStarted = true;
        } else {
            this.pseudoPageStarted = false;
        }
        this.pseudoPageWordCount = 0;
        AttributesImpl attributesImpl = new AttributesImpl();
        setAttributeValue(attributesImpl, "unit", "pseudopage");
        setAttributeValue(attributesImpl, "n", this.pseudoPageCount + "");
        setAttributeValue(attributesImpl, "position", z2 ? "start" : "end");
        if (str2 != null && str2.length() > 0) {
            setAttributeValue(attributesImpl, WordAttributeNames.p, str2);
        }
        return new PendingElement(str, "milestone", "milestone", attributesImpl);
    }

    public void emitPseudoPageElement(PendingElement pendingElement) {
        if (pendingElement != null) {
            try {
                super.startElement(pendingElement.getURI(), pendingElement.getLocalName(), pendingElement.getQName(), pendingElement.getAttributes());
                super.endElement(pendingElement.getURI(), pendingElement.getLocalName(), pendingElement.getQName());
            } catch (Exception e) {
            }
        }
    }

    public void setPosTags(PartOfSpeechTags partOfSpeechTags) {
        this.posTags = partOfSpeechTags;
    }

    protected void setSplitWords(Map<Integer, Integer> map) {
        this.splitWords = map;
        this.splitWordsCopy = MapFactory.createNewMap();
        this.splitWordsCopy.putAll(map);
    }

    protected void setIDFormat(String str, int i, int i2) {
        this.baseFileName = FileNameUtils.stripPathName(str);
        this.baseFileName = FileNameUtils.changeFileExtension(this.baseFileName, "");
        this.baseFileName = StringUtils.replaceAll(this.baseFileName, ".", "_");
        this.idType = this.morphAdornerSettings.xmlIDType;
        this.idSpacing = this.morphAdornerSettings.xmlIDSpacing;
        int log10 = ((int) ArithUtils.log10(i * this.idSpacing)) + 1;
        ID_FORMATTER.setMinimumIntegerDigits(log10);
        int i3 = 1;
        if (i2 > 0) {
            i3 = ((int) ArithUtils.log10(i2)) + 1;
        }
        PAGE_FORMATTER.setMinimumIntegerDigits(i3);
        int log102 = ((int) ArithUtils.log10(999 * this.idSpacing)) + 1;
        if (i2 <= 0) {
            log102 = log10;
        }
        WORD_FORMATTER.setMinimumIntegerDigits(log102);
    }

    public void setWriter(XMLWriter xMLWriter) {
        this.writer = xMLWriter;
        this.sentenceMelder = new XMLSentenceMelder(xMLWriter);
    }

    public void setSentenceMelder(XMLSentenceMelder xMLSentenceMelder) {
        this.sentenceMelder = xMLSentenceMelder;
    }

    public XMLSentenceMelder getSentenceMelder() {
        return this.sentenceMelder;
    }

    public String getForeignLanguageTag(String str, Attributes attributes) {
        String foreignWordTag;
        String value = attributes.getValue("xml:lang");
        if (value == null) {
            value = attributes.getValue("lang");
        }
        if (value == null) {
            value = attributes.getValue("LANG");
        }
        if (value == null) {
            foreignWordTag = !this.foreignStack.isEmpty() ? this.foreignStack.peek() : str.equalsIgnoreCase("foreign") ? this.posTags.getForeignWordTag("unknown") : "";
        } else {
            int indexOf = value.indexOf("-");
            if (indexOf >= 0) {
                value = value.substring(0, indexOf);
            }
            foreignWordTag = this.posTags.getForeignWordTag(languageTags.containsKey(value) ? languageTags.get(value) : "other");
        }
        return foreignWordTag;
    }

    protected static String getDisplayableAttributes(Attributes attributes) {
        StringBuffer stringBuffer = new StringBuffer();
        if (attributes != null) {
            for (int i = 0; i < attributes.getLength(); i++) {
                stringBuffer.append(attributes.getLocalName(i));
                stringBuffer.append("=");
                stringBuffer.append(attributes.getValue(i));
                stringBuffer.append(" ");
            }
        }
        return stringBuffer.toString();
    }

    static {
        PAGE_FORMATTER.setMinimumIntegerDigits(4);
        WORD_FORMATTER.setMinimumIntegerDigits(3);
        ID_FORMATTER.setMinimumIntegerDigits(8);
        PAGE_FORMATTER.setGroupingUsed(false);
        WORD_FORMATTER.setGroupingUsed(false);
        ID_FORMATTER.setGroupingUsed(false);
        languageTags = new TreeMap();
        languageTags.put("ang", "old-english");
        languageTags.put("ara", "arabic");
        languageTags.put("ar", "arabic");
        languageTags.put("cat", "catalan");
        languageTags.put("ca", "catalan");
        languageTags.put("eng", "english");
        languageTags.put("en", "english");
        languageTags.put("enm", "middle-english");
        languageTags.put("deu", "german");
        languageTags.put("de", "german");
        languageTags.put("dut", "dutch");
        languageTags.put("nld", "dutch");
        languageTags.put("nl", "german");
        languageTags.put("fra", "french");
        languageTags.put("fre", "french");
        languageTags.put("fr", "french");
        languageTags.put("frm", "middle-french");
        languageTags.put("fro", "old-french");
        languageTags.put("grc", "greek");
        languageTags.put("gre", "greek");
        languageTags.put("ell", "greek");
        languageTags.put("el", "greek");
        languageTags.put("heb", "hebrew");
        languageTags.put("he", "hebrew");
        languageTags.put("ita", "italian");
        languageTags.put("ital", "italian");
        languageTags.put("it", "italian");
        languageTags.put("lat", "latin");
        languageTags.put("la", "latin");
        languageTags.put("may", "malay");
        languageTags.put("msa", "malay");
        languageTags.put("ms", "malay");
        languageTags.put("mlg", "malagasy");
        languageTags.put("mg", "malagasy");
        languageTags.put("per", "persian");
        languageTags.put("fas", "persian");
        languageTags.put("fa", "persian");
        languageTags.put("por", "portuguese");
        languageTags.put("pt", "portuguese");
        languageTags.put("sai", "south-american-indian");
        languageTags.put("san", "sanskrit");
        languageTags.put("sa", "sanskrit");
        languageTags.put("sco", "scots");
        languageTags.put("spa", "spanish");
        languageTags.put("es", "spanish");
        languageTags.put("wel", "welsh");
        languageTags.put("cym", "welsh");
        languageTags.put("cy", "welsh");
    }
}
