package edu.northwestern.at.morphadorner.corpuslinguistics.ngram;

import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.StringUtils;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:edu/northwestern/at/morphadorner/corpuslinguistics/ngram/NGramExtractor.class */
public class NGramExtractor {
    int nGramSize;
    int windowSize;
    protected Map<String, Integer> nGramCounts = MapFactory.createNewSortedMap();
    protected int numberOfNGrams = 0;

    public NGramExtractor(int i, int i2) {
        this.nGramSize = 2;
        this.windowSize = 2;
        this.nGramSize = i;
        this.windowSize = i2;
    }

    public void addWords(String[] strArr) {
        addWords(Arrays.asList(strArr));
    }

    public void addWords(List<String> list) {
        for (int i = this.nGramSize - 1; i < list.size(); i++) {
            StringBuffer stringBuffer = new StringBuffer();
            for (int i2 = this.nGramSize - 1; i2 >= 0; i2--) {
                if (stringBuffer.length() > 0) {
                    stringBuffer = stringBuffer.append("\t");
                }
                stringBuffer = stringBuffer.append(list.get(i - i2));
            }
            String stringBuffer2 = stringBuffer.toString();
            if (this.nGramCounts.containsKey(stringBuffer2)) {
                this.nGramCounts.put(stringBuffer2, new Integer(this.nGramCounts.get(stringBuffer2).intValue() + 1));
            } else {
                this.nGramCounts.put(stringBuffer2, new Integer(1));
            }
        }
    }

    public void mergeNGramExtractor(NGramExtractor nGramExtractor) {
        Map<String, Integer> nGramMap = nGramExtractor.getNGramMap();
        for (String str : nGramMap.keySet()) {
            if (this.nGramCounts.containsKey(str)) {
                this.nGramCounts.put(str, new Integer(this.nGramCounts.get(str).intValue() + 1));
            } else {
                this.nGramCounts.put(str, new Integer(1));
            }
        }
        this.numberOfNGrams = 0;
        Iterator<String> it = nGramMap.keySet().iterator();
        while (it.hasNext()) {
            this.numberOfNGrams += this.nGramCounts.get(it.next()).intValue();
        }
    }

    public int getNGramCount(String str) {
        int i = 0;
        if (this.nGramCounts.containsKey(str)) {
            i = this.nGramCounts.get(str).intValue();
        }
        return i;
    }

    public String[] getNGrams() {
        int size = this.nGramCounts.size();
        String[] strArr = new String[size];
        Iterator<String> it = this.nGramCounts.keySet().iterator();
        for (int i = 0; i < size; i++) {
            strArr[i] = it.next();
        }
        return strArr;
    }

    public Map<String, Integer> getNGramMap() {
        return this.nGramCounts;
    }

    public int getNumberOfNGrams() {
        this.numberOfNGrams = 0;
        Iterator<String> it = this.nGramCounts.keySet().iterator();
        while (it.hasNext()) {
            this.numberOfNGrams += this.nGramCounts.get(it.next()).intValue();
        }
        return this.numberOfNGrams;
    }

    public int getNumberOfUniqueNGrams() {
        return this.nGramCounts.size();
    }

    public static String[] splitNGramIntoWords(String str) {
        return StringUtils.makeTokenArray(str, "\t");
    }
}
