package edu.northwestern.at.morphadorner.tools.punktabbreviationdetector;

import edu.northwestern.at.morphadorner.corpuslinguistics.statistics.BigramLogLikelihood;
import edu.northwestern.at.utils.StringUtils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/northwestern/at/morphadorner/tools/punktabbreviationdetector/PunktTokenCounter.class */
public class PunktTokenCounter {
    protected static final int START = 0;
    protected static final int CANDIDATE_1 = 1;
    protected static final int CANDIDATE_2 = 2;
    protected int state;
    protected StringBuilder b;
    protected Map<String, Integer> c;
    protected Set<String> candidates;
    protected int n;
    protected double abbreviationThreshold;
    protected boolean ignoreAbbreviationPenalty;

    /* JADX INFO: Access modifiers changed from: package-private */
    public PunktTokenCounter(double d, boolean z) {
        this.abbreviationThreshold = 0.3d;
        this.ignoreAbbreviationPenalty = false;
        this.abbreviationThreshold = d;
        this.ignoreAbbreviationPenalty = z;
        this.state = 0;
        this.b = new StringBuilder();
        this.c = new HashMap();
        this.candidates = new HashSet();
        this.n = 0;
    }

    PunktTokenCounter() {
        this(0.3d, false);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void count(PunktToken punktToken) {
        if (punktToken == null) {
            return;
        }
        switch (this.state) {
            case 0:
                if (isPeriod(punktToken)) {
                    inc(".");
                    return;
                } else {
                    if (punktToken.getTokenType() == PunktTokenType.WORD || punktToken.getTokenType() == PunktTokenType.NUMBER) {
                        this.b.append(punktToken.getTokenText());
                        this.state = 1;
                        return;
                    }
                    return;
                }
            case 1:
                if (isPeriod(punktToken)) {
                    this.b.append(".");
                    this.state = 2;
                    return;
                } else {
                    inc(this.b.toString());
                    this.b = new StringBuilder();
                    this.state = 0;
                    return;
                }
            case 2:
                if (punktToken.getTokenType() == PunktTokenType.WHITESPACE) {
                    inc(this.b.toString());
                    inc(".");
                    this.b = new StringBuilder();
                    this.state = 0;
                    return;
                }
                if (punktToken.getTokenType() == PunktTokenType.WORD || punktToken.getTokenType() == PunktTokenType.NUMBER) {
                    this.b.append(punktToken.getTokenText());
                    this.state = 1;
                    return;
                } else {
                    inc(this.b.toString());
                    this.b = new StringBuilder();
                    this.state = 0;
                    return;
                }
            default:
                return;
        }
    }

    protected void finish() {
        if (this.b.length() > 0) {
            String sb = this.b.toString();
            inc(sb);
            if (sb.endsWith(".")) {
                inc(".");
            }
            this.b = new StringBuilder();
        }
    }

    protected boolean isPeriod(PunktToken punktToken) {
        return punktToken.getTokenType() == PunktTokenType.NONWORD && punktToken.getTokenText().equals(".");
    }

    protected void inc(String str) {
        Integer num = this.c.get(str);
        if (num == null) {
            this.c.put(str, 1);
        } else {
            this.c.put(str, Integer.valueOf(num.intValue() + 1));
        }
        this.n++;
        if (str.length() <= 1 || !str.endsWith(".") || Character.isDigit(str.charAt(0))) {
            return;
        }
        this.candidates.add(str);
    }

    public int getCount(String str) {
        Integer num = this.c.get(str);
        if (num == null) {
            return 0;
        }
        return num.intValue();
    }

    public int getN() {
        return this.n;
    }

    public Set<String> getCandidates() {
        return this.candidates;
    }

    public Set<String> getAbbreviations() {
        HashSet hashSet = new HashSet();
        for (String str : this.candidates) {
            if (isAnAbbreviation(str)) {
                hashSet.add(str);
            }
        }
        return hashSet;
    }

    protected boolean isAnAbbreviation(String str) {
        if (str.length() < 2 || !str.endsWith(".")) {
            return false;
        }
        double calculateLogLikelihood = BigramLogLikelihood.calculateLogLikelihood(getCount(str) + getCount(str.substring(0, str.length() - 1)), getCount("."), getCount(str), getN());
        int countChar = StringUtils.countChar(str, '.');
        int i = countChar + 1;
        int length = str.length() - countChar;
        double exp = 1.0d / Math.exp(length);
        double d = 1.0d;
        if (!this.ignoreAbbreviationPenalty) {
            d = 1.0d / Math.pow(length, r0 - r0);
        }
        return ((calculateLogLikelihood * ((double) i)) * exp) * d >= this.abbreviationThreshold;
    }
}
