package org.xmlcml.ami2.plugins.word;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.xmlcml.ami2.plugins.AMIArgProcessor;
import org.xmlcml.ami2.plugins.AMISearcher;
import org.xmlcml.cmine.args.ArgIterator;
import org.xmlcml.cmine.args.ArgumentOption;
import org.xmlcml.cmine.files.CTree;
import org.xmlcml.cmine.files.ContentProcessor;
import org.xmlcml.cmine.files.ResultsElement;
import org.xmlcml.cmine.files.ResultsElementList;
import org.xmlcml.cmine.lookup.DefaultStringDictionary;
import org.xmlcml.euclid.IntRange;
import org.xmlcml.xml.XMLUtil;

/* loaded from: input_file:org/xmlcml/ami2/plugins/word/WordArgProcessor.class */
public class WordArgProcessor extends AMIArgProcessor {
    public static final Logger LOG = Logger.getLogger(WordArgProcessor.class);
    public static final String WORD_LENGTHS = "wordLengths";
    public static final String WORD_FREQUENCIES = "wordFrequencies";
    public static final String WORD_SEARCH = "wordSearch";
    public static final List<String> ANALYSIS_METHODS;
    public static final String ABBREVIATION = "abbreviation";
    public static final String ACRONYM = "acronym";
    public static final String CAPITALIZED = "capitalized";
    public static final List<String> WORD_TYPES;
    public static final String PRESERVE = "preserve";
    public static final String IGNORE = "ignore";
    public static final List<String> CASE_TYPES;
    public static final String FREQUENCIES = "frequencies";
    private static final String TFIDF = "tfidf";
    private static final String TFIDF_XML = "tfidf.xml";
    private static final String TFIDF_HTML = "tfidf.html";
    private static final String AGGREGATE_FREQUENCY = "aggregate";
    private static final String AGGREGATE_XML = "aggregate.xml";
    private static final String AGGREGATE_HTML = "aggregate.html";
    private static final String BOOLEAN_FREQUENCY = "booleanFrequency";
    private static final String BOOLEAN_FREQUENCY_XML = "booleanFrequency.xml";
    private static final String BOOLEAN_FREQUENCY_HTML = "booleanFrequency.html";
    private static final String TFIDF_FREQUENCY = "tfidfFrequency";
    private static final String TFIDF_FREQUENCY_XML = "tfidfFrequency.xml";
    private static final String TFIDF_FREQUENCY_HTML = "tfidfFrequency.html";
    static final double MIN_FONT = 10.0d;
    static final double MAX_FONT = 30.0d;
    private List<WordSetWrapper> stopwordSetList;
    private List<String> chosenMethods;
    private IntRange wordLengthRange;
    private List<String> chosenWordTypes;
    protected List<String> words;
    private Boolean stemming;
    private List<String> wordCaseList;
    private List<String> summaryMethods;
    WordResultsElementList frequenciesElementList;
    WordResultsElement aggregatedFrequenciesElement;
    private IntRange wordCount;
    private WordResultsElement booleanFrequencyElement;
    private Map<String, ResultsElement> resultsByDictionary;

    public WordArgProcessor() {
        this.chosenMethods = new ArrayList();
        this.wordCaseList = new ArrayList();
    }

    public WordArgProcessor(String str) {
        this();
        parseArgs(str);
    }

    public WordArgProcessor(String[] strArr) {
        this();
        parseArgs(strArr);
    }

    public void parseWords(ArgumentOption argumentOption, ArgIterator argIterator) {
        List<String> createTokenListUpToNextNonDigitMinus = argIterator.createTokenListUpToNextNonDigitMinus(argumentOption);
        if (createTokenListUpToNextNonDigitMinus.size() == 0) {
            helpMethods();
        } else {
            this.chosenMethods = getChosenList(ANALYSIS_METHODS, createTokenListUpToNextNonDigitMinus);
        }
    }

    public void parseCase(ArgumentOption argumentOption, ArgIterator argIterator) {
        List<String> createTokenListUpToNextNonDigitMinus = argIterator.createTokenListUpToNextNonDigitMinus(argumentOption);
        this.wordCaseList = new ArrayList();
        if (createTokenListUpToNextNonDigitMinus.size() == 0) {
            this.wordCaseList.add("preserve");
        } else {
            this.wordCaseList = createTokenListUpToNextNonDigitMinus;
        }
        checkWordCaseList();
    }

    public void parseStem(ArgumentOption argumentOption, ArgIterator argIterator) {
        this.stemming = argIterator.getBoolean(argumentOption);
    }

    public void parseStopwords(ArgumentOption argumentOption, ArgIterator argIterator) {
        addStopwords(argIterator.createTokenListUpToNextNonDigitMinus(argumentOption));
    }

    public void parseWordLengths(ArgumentOption argumentOption, ArgIterator argIterator) {
        this.wordLengthRange = argIterator.getIntRange(argumentOption);
        if (this.wordLengthRange.getMin() < 1 || this.wordLengthRange.getMax() < 1) {
            throw new RuntimeException("bad word lengths: " + this.wordLengthRange);
        }
    }

    public void parseWordTypes(ArgumentOption argumentOption, ArgIterator argIterator) {
        List<String> createTokenListUpToNextNonDigitMinus = argIterator.createTokenListUpToNextNonDigitMinus(argumentOption);
        if (createTokenListUpToNextNonDigitMinus.size() == 0) {
            helpWordTypes();
        } else {
            this.chosenWordTypes = getChosenList(WORD_TYPES, createTokenListUpToNextNonDigitMinus);
        }
    }

    public void parseMinCount(ArgumentOption argumentOption, ArgIterator argIterator) {
        this.wordCount = argIterator.getIntRange(argumentOption);
    }

    public void runExtractWords(ArgumentOption argumentOption) {
        ensureWordCollectionFactory();
        this.wordCollectionFactory.extractWords();
    }

    public void parseSearch(ArgumentOption argumentOption, ArgIterator argIterator) {
        ensureSearcherList();
        createAndAddDictionaries(argIterator.createTokenListUpToNextNonDigitMinus(argumentOption));
        Iterator<DefaultStringDictionary> it = getDictionaryList().iterator();
        while (it.hasNext()) {
            this.searcherList.add(new WordSearcher(this, it.next()));
        }
    }

    public void outputWords(ArgumentOption argumentOption) {
        ContentProcessor orCreateContentProcessor = getOrCreateContentProcessor();
        ResultsElementList orCreateResultsElementList = orCreateContentProcessor.getOrCreateResultsElementList();
        for (int i = 0; i < orCreateResultsElementList.size(); i++) {
            ((WordResultsElement) orCreateResultsElementList.get(i)).writeResultsElementAsHTML(new File(orCreateContentProcessor.createResultsDirectoryAndOutputResultsElement(argumentOption, orCreateResultsElementList.get(i)), CTree.RESULTS_HTML), this);
        }
    }

    public void parseSummary(ArgumentOption argumentOption, ArgIterator argIterator) {
        List<String> createTokenListUpToNextNonDigitMinus = argIterator.createTokenListUpToNextNonDigitMinus(argumentOption);
        if (createTokenListUpToNextNonDigitMinus.size() == 0) {
            LOG.error("parseSummary needs a list of actions");
        } else {
            this.summaryMethods = createTokenListUpToNextNonDigitMinus;
        }
    }

    public void finalSummary(ArgumentOption argumentOption) {
        WordResultsElementList aggregateOverCMDirList = aggregateOverCMDirList(getPlugin(), FREQUENCIES);
        ensureWordCollectionFactory();
        Iterator<String> it = this.summaryMethods.iterator();
        while (it.hasNext()) {
            runSummaryMethod(aggregateOverCMDirList, this.wordCollectionFactory, it.next());
        }
    }

    private void runSummaryMethod(WordResultsElementList wordResultsElementList, WordCollectionFactory wordCollectionFactory, String str) {
        if (AGGREGATE_FREQUENCY.equals(str) && this.summaryFileName != null) {
            this.aggregatedFrequenciesElement = wordCollectionFactory.createAggregatedFrequenciesElement(wordResultsElementList);
            writeResultsElement(new File(this.summaryFileName, AGGREGATE_XML), this.aggregatedFrequenciesElement);
            this.aggregatedFrequenciesElement.writeResultsElementAsHTML(new File(this.summaryFileName, AGGREGATE_HTML), this);
        } else if (BOOLEAN_FREQUENCY.equals(str) && this.summaryFileName != null) {
            this.booleanFrequencyElement = wordCollectionFactory.createBooleanFrequencies(this, wordResultsElementList);
            writeResultsElement(new File(this.summaryFileName, BOOLEAN_FREQUENCY_XML), this.booleanFrequencyElement);
            this.booleanFrequencyElement.writeResultsElementAsHTML(new File(this.summaryFileName, BOOLEAN_FREQUENCY_HTML), this);
        } else {
            if (!TFIDF_FREQUENCY.equals(str) || this.summaryFileName == null) {
                return;
            }
            WordResultsElement createTFIDFFrequencies = wordCollectionFactory.createTFIDFFrequencies(this, wordResultsElementList);
            writeResultsElement(new File(this.summaryFileName, TFIDF_XML), createTFIDFFrequencies);
            createTFIDFFrequencies.writeResultsElementAsHTML(new File(this.summaryFileName, TFIDF_HTML), this);
        }
    }

    public void runSearch(ArgumentOption argumentOption) {
        ensureResultsByDictionary();
        ensureSearcherList();
        Iterator<AMISearcher> it = this.searcherList.iterator();
        while (it.hasNext()) {
            WordSearcher wordSearcher = (WordSearcher) it.next();
            String title = wordSearcher.getTitle();
            ResultsElement searchWordList = wordSearcher.searchWordList();
            searchWordList.setTitle(title);
            this.resultsByDictionary.put(title, searchWordList);
        }
    }

    public void outputSearch(ArgumentOption argumentOption) {
        outputResultsElements(argumentOption.getName());
    }

    private void outputResultsElements(String str) {
        ContentProcessor orCreateContentProcessor = this.currentCTree.getOrCreateContentProcessor();
        orCreateContentProcessor.clearResultsElementList();
        for (String str2 : this.resultsByDictionary.keySet()) {
            ResultsElement resultsElement = this.resultsByDictionary.get(str2);
            resultsElement.setTitle(str2);
            orCreateContentProcessor.addResultsElement(resultsElement);
        }
        orCreateContentProcessor.createResultsDirectoriesAndOutputResultsElement(str);
    }

    private static void writeResultsElement(File file, ResultsElement resultsElement) {
        try {
            file.getParentFile().mkdirs();
            XMLUtil.debug(resultsElement, new FileOutputStream(file), 1);
        } catch (IOException e) {
            throw new RuntimeException("Cannot write file " + file, e);
        }
    }

    private void ensureResultsByDictionary() {
        if (this.resultsByDictionary == null) {
            this.resultsByDictionary = new HashMap();
        }
    }

    private void addStopwords(List<String> list) {
        ensureStopwordSetList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            addStopwords(it.next());
        }
    }

    public WordResultsElementList aggregateOverCMDirList(String str, String str2) {
        WordResultsElementList wordResultsElementList = new WordResultsElementList();
        Iterator<CTree> it = this.cTreeList.iterator();
        while (it.hasNext()) {
            CTree next = it.next();
            if (next.getResultsElement(str, str2) == null) {
                LOG.error("Null results element, skipped " + next.getDirectory());
            } else {
                wordResultsElementList.add(new WordResultsElement(next.getResultsElement(str, str2)));
            }
        }
        return wordResultsElementList;
    }

    private void helpMethods() {
        System.err.println("ANALYSIS METHODS");
        Iterator<String> it = ANALYSIS_METHODS.iterator();
        while (it.hasNext()) {
            System.err.println("  " + it.next());
        }
    }

    private void helpWordTypes() {
        System.err.println("WORD TYPES");
        Iterator<String> it = WORD_TYPES.iterator();
        while (it.hasNext()) {
            System.err.println("  " + it.next());
        }
    }

    private void checkWordCaseList() {
        if (this.wordCaseList.size() == 1 && "preserve".equals(this.wordCaseList.get(0))) {
            return;
        }
        for (int size = this.wordCaseList.size() - 1; size >= 0; size--) {
            String str = this.wordCaseList.get(size);
            if (this.wordCaseList.contains("preserve") || !CASE_TYPES.contains(str)) {
                LOG.error("Removed forbidden/unknown word: " + str);
                this.wordCaseList.remove(size);
            }
        }
    }

    private void addStopwords(String str) {
        ensureStopwordSetList();
        WordSetWrapper createStopwordSet = WordSetWrapper.createStopwordSet(str);
        if (createStopwordSet != null) {
            this.stopwordSetList.add(createStopwordSet);
        }
    }

    private void ensureStopwordSetList() {
        if (this.stopwordSetList == null) {
            this.stopwordSetList = new ArrayList();
        }
    }

    public IntRange getWordLengthRange() {
        return this.wordLengthRange;
    }

    public List<WordSetWrapper> getStopwordSetList() {
        ensureStopwordSetList();
        return this.stopwordSetList;
    }

    public List<String> getChosenMethods() {
        return this.chosenMethods;
    }

    public boolean getStemming() {
        return this.stemming.booleanValue();
    }

    public List<String> getWordCaseList() {
        return this.wordCaseList;
    }

    public List<String> getChosenWordTypes() {
        ensureChosenWordTypes();
        return this.chosenWordTypes;
    }

    private void ensureChosenWordTypes() {
        if (this.chosenWordTypes == null) {
            this.chosenWordTypes = new ArrayList();
        }
    }

    static {
        LOG.setLevel(Level.DEBUG);
        ANALYSIS_METHODS = Arrays.asList(WORD_FREQUENCIES, WORD_LENGTHS, WORD_SEARCH);
        WORD_TYPES = Arrays.asList(ABBREVIATION, "acronym", CAPITALIZED);
        CASE_TYPES = Arrays.asList(IGNORE, ABBREVIATION, "preserve");
    }
}
