package edu.northwestern.at.morphadorner.corpuslinguistics.multiwordunits;

import edu.northwestern.at.morphadorner.corpuslinguistics.ngram.NGramExtractor;
import edu.northwestern.at.morphadorner.corpuslinguistics.statistics.BigramLogLikelihood;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.math.ArithUtils;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

/* loaded from: input_file:edu/northwestern/at/morphadorner/corpuslinguistics/multiwordunits/MultiwordUnitData.class */
public class MultiwordUnitData {
    protected String mwu;
    protected int mwuCount;
    protected int mwuLength;
    protected String[] words;
    protected int[] wordCounts;
    protected double dice;
    protected double logLikelihood;
    protected double phiSquared;
    protected double scp;
    protected double si;
    protected double sigLogLikelihood;
    protected NGramExtractor[] extractors;
    protected String leftSuccessorPattern;
    protected String rightSuccessorPattern;
    protected int totalWordCount;
    protected Map<String, Integer> wordCountMap;

    public MultiwordUnitData(String str, Map<String, Integer> map, int i, NGramExtractor[] nGramExtractorArr) {
        this.mwu = str;
        this.wordCountMap = map;
        this.totalWordCount = i;
        this.extractors = nGramExtractorArr;
        this.leftSuccessorPattern = "\t" + str;
        this.rightSuccessorPattern = str + "\t";
        this.words = NGramExtractor.splitNGramIntoWords(str);
        this.mwuLength = this.words.length;
        this.mwuCount = nGramExtractorArr[this.words.length - 1].getNGramCount(str);
        calculateAssociationMeasures();
    }

    public String getMWUText() {
        return this.mwu;
    }

    public int getMWUTextCount() {
        return this.mwuCount;
    }

    public int getMWUTextLength() {
        return this.mwuLength;
    }

    public String[] getWords() {
        return this.words;
    }

    public int[] getWordCounts() {
        return this.wordCounts;
    }

    public String leftAntecedent() {
        StringBuffer stringBuffer = new StringBuffer(this.words.length * 10);
        for (int i = 0; i < this.words.length - 1; i++) {
            if (i > 0) {
                stringBuffer = stringBuffer.append("\t");
            }
            stringBuffer.append(this.words[i]);
        }
        return stringBuffer.toString();
    }

    public String rightAntecedent() {
        StringBuffer stringBuffer = new StringBuffer(this.words.length * 10);
        for (int i = 1; i < this.words.length; i++) {
            if (i > 1) {
                stringBuffer = stringBuffer.append("\t");
            }
            stringBuffer.append(this.words[i]);
        }
        return stringBuffer.toString();
    }

    public String[] successors() {
        NGramExtractor nGramExtractor = this.extractors[this.words.length];
        List createNewList = ListFactory.createNewList();
        String[] nGrams = nGramExtractor.getNGrams();
        for (int i = 0; i < nGrams.length; i++) {
            if (nGrams[i].startsWith(this.rightSuccessorPattern)) {
                createNewList.add(nGrams[i]);
            } else if (nGrams[i].endsWith(this.leftSuccessorPattern)) {
                createNewList.add(nGrams[i]);
            }
        }
        return (String[]) createNewList.toArray(new String[0]);
    }

    public String[] leftSuccessors() {
        NGramExtractor nGramExtractor = this.extractors[this.words.length];
        List createNewList = ListFactory.createNewList();
        for (String str : nGramExtractor.getNGramMap().keySet()) {
            if (str.endsWith(this.leftSuccessorPattern)) {
                createNewList.add(str);
            }
        }
        return (String[]) createNewList.toArray(new String[0]);
    }

    public String[] rightSuccessors() {
        NGramExtractor nGramExtractor = this.extractors[this.words.length];
        List createNewList = ListFactory.createNewList();
        for (String str : new TreeMap(((SortedMap) nGramExtractor.getNGramMap()).tailMap(this.rightSuccessorPattern)).keySet()) {
            if (!str.startsWith(this.rightSuccessorPattern)) {
                break;
            }
            createNewList.add(str);
        }
        return (String[]) createNewList.toArray(new String[0]);
    }

    public double getAvx() {
        NGramExtractor nGramExtractor = this.extractors[this.words.length - 1];
        double wordCount = getWordCount(this.words[0]);
        for (int i = 1; i <= this.words.length - 2; i++) {
            StringBuffer stringBuffer = new StringBuffer(this.words.length * 10);
            int i2 = -1;
            for (int i3 = 0; i3 <= i; i3++) {
                if (i3 > 0) {
                    stringBuffer = stringBuffer.append("\t");
                }
                stringBuffer = stringBuffer.append(this.words[i3]);
                i2++;
            }
            wordCount += this.extractors[i2].getNGramCount(stringBuffer.toString());
        }
        return wordCount / (this.words.length - 1);
    }

    public double getAvy() {
        NGramExtractor nGramExtractor = this.extractors[this.words.length - 1];
        double wordCount = getWordCount(this.words[this.words.length - 1]);
        int length = this.words.length - 2;
        for (int i = 1; i < this.words.length - 1; i++) {
            StringBuffer stringBuffer = new StringBuffer(this.words.length * 10);
            for (int i2 = i; i2 < this.words.length; i2++) {
                if (stringBuffer.length() > 0) {
                    stringBuffer = stringBuffer.append("\t");
                }
                stringBuffer = stringBuffer.append(this.words[i2]);
            }
            int i3 = length;
            length--;
            wordCount += this.extractors[i3].getNGramCount(stringBuffer.toString());
        }
        return wordCount / (this.words.length - 1);
    }

    protected double getAvp() {
        int length = this.words.length;
        double d = 0.0d;
        for (int i = 0; i < length - 1; i++) {
            d += prob(this.words, 0, i) * prob(this.words, i + 1, length - 1);
        }
        return d / (length - 1);
    }

    protected double getAvp2() {
        int length = this.words.length;
        double d = 0.0d;
        for (int i = 0; i < length - 1; i++) {
            d += freq(this.words, 0, i) * freq(this.words, i + 1, length - 1);
        }
        return d / (length - 1);
    }

    public void calculateAssociationMeasures() {
        double avx = getAvx();
        double avy = getAvy();
        double avp = getAvp();
        double avp2 = getAvp2();
        this.logLikelihood = BigramLogLikelihood.calculateLogLikelihood(avx, avy, this.mwuCount, this.totalWordCount);
        this.dice = (2.0d * freq(this.words, 0, this.words.length - 1)) / (avx + avy);
        double prob = prob(this.words, 0, this.words.length - 1);
        this.scp = Math.max(Math.min((prob * prob) / avp, 1.0d), 0.0d);
        this.si = ArithUtils.log2(prob / avp);
        this.phiSquared = 0.0d;
        double d = this.mwuCount;
        double d2 = this.totalWordCount;
        double d3 = (d * d2) - avp2;
        double d4 = d3 * d3;
        double d5 = avp2 * (d2 - avx) * (d2 - avy);
        if (d5 != 0.0d) {
            this.phiSquared = d4 / d5;
        }
    }

    public double prob(String[] strArr, int i, int i2) {
        StringBuffer stringBuffer = new StringBuffer(strArr.length * 10);
        int i3 = 0;
        for (int i4 = i; i4 <= i2; i4++) {
            if (stringBuffer.length() > 0) {
                stringBuffer.append("\t");
            }
            stringBuffer.append(strArr[i4]);
            i3++;
        }
        return i3 == 1 ? getWordCount(stringBuffer.toString()) / this.totalWordCount : this.extractors[i3 - 1].getNGramCount(stringBuffer.toString()) / this.extractors[i3 - 1].getNumberOfNGrams();
    }

    public double freq(String[] strArr, int i, int i2) {
        StringBuffer stringBuffer = new StringBuffer(strArr.length * 10);
        int i3 = 0;
        for (int i4 = i; i4 <= i2; i4++) {
            if (stringBuffer.length() > 0) {
                stringBuffer.append("\t");
            }
            stringBuffer.append(strArr[i4]);
            i3++;
        }
        return i3 == 1 ? getWordCount(stringBuffer.toString()) : this.extractors[i3 - 1].getNGramCount(stringBuffer.toString());
    }

    public double getDice() {
        return this.dice;
    }

    public double getLogLikelihood() {
        return this.logLikelihood;
    }

    public double getPhiSquared() {
        return this.phiSquared;
    }

    public double getSCP() {
        return this.scp;
    }

    public double getSI() {
        return this.si;
    }

    public double getSigLogLikelihood() {
        return this.sigLogLikelihood;
    }

    public int getWordCount(String str) {
        int i = 0;
        if (this.wordCountMap.containsKey(str)) {
            i = this.wordCountMap.get(str).intValue();
        }
        return i;
    }

    public String toString() {
        return this.mwu.replaceAll("\t", " ");
    }
}
