package uk.ac.cam.ch.wwmm.oscarrecogniser.tokenanalysis;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.apache.xpath.XPath;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarrecogniser/tokenanalysis/NGram.class */
public class NGram {
    private static final String MODEL_FILE = "uk/ac/cam/ch/wwmm/oscarrecogniser/tokenanalysis/ngram-model.dat.gz";
    private static NGram instance;
    private final int len;
    private final int step0;
    private final int step1;
    private final int step2;
    private SuffixClassifier suffixClassifier;
    private final short[] data;
    private static Pattern p1 = Pattern.compile("[1-9]");
    private static Pattern p2 = Pattern.compile("0+");
    private static Pattern p3 = Pattern.compile("[^$^S0%<>&'()*+,./:;=?@|~a-z\\[\\]-]");

    @Deprecated
    public static synchronized NGram getInstance() {
        if (instance == null) {
            try {
                instance = new NGram();
                instance.loadData(MODEL_FILE);
            } catch (EOFException e) {
            } catch (IOException e2) {
                throw new RuntimeException("Error loading data", e2);
            }
        }
        return instance;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static NGram loadModel(String str) throws IOException {
        NGram nGram = new NGram();
        nGram.loadData(str);
        return nGram;
    }

    private NGram() throws IOException {
        this.len = "$^S0%<>&'()*+,-./:;=?@[]abcdefghijklmnopqrstuvwxyz|~".length();
        this.step0 = this.len * this.len * this.len;
        this.step1 = this.len * this.len;
        this.step2 = this.len;
        this.suffixClassifier = null;
        this.data = new short[this.len * this.len * this.len * this.len];
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public NGram(short[] sArr) {
        this.len = "$^S0%<>&'()*+,-./:;=?@[]abcdefghijklmnopqrstuvwxyz|~".length();
        this.step0 = this.len * this.len * this.len;
        this.step1 = this.len * this.len;
        this.step2 = this.len;
        this.suffixClassifier = null;
        this.data = sArr;
    }

    private void loadData(String str) throws IOException {
        InputStream systemResourceAsStream = ClassLoader.getSystemResourceAsStream(str);
        if (systemResourceAsStream == null) {
            throw new FileNotFoundException("File not found: " + str);
        }
        DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(new GZIPInputStream(systemResourceAsStream)));
        int i = this.len * this.len * this.len * this.len;
        for (int i2 = 0; i2 < i; i2++) {
            this.data[i2] = dataInputStream.readShort();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void saveData(OutputStream outputStream) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new GZIPOutputStream(outputStream)));
        try {
            try {
                int i = this.len * this.len * this.len * this.len;
                for (int i2 = 0; i2 < i; i2++) {
                    dataOutputStream.writeShort(this.data[i2]);
                }
                dataOutputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
                dataOutputStream.close();
            }
        } catch (Throwable th) {
            dataOutputStream.close();
            throw th;
        }
    }

    public double testWord(String str) {
        String parseWord = parseWord(str);
        if (parseWord.length() <= 1) {
            return XPath.MATCH_SCORE_QNAME;
        }
        String addStartAndEnd = addStartAndEnd(parseWord);
        int length = addStartAndEnd.length();
        int indexOf = "$^S0%<>&'()*+,-./:;=?@[]abcdefghijklmnopqrstuvwxyz|~".indexOf(addStartAndEnd.charAt(0));
        int indexOf2 = "$^S0%<>&'()*+,-./:;=?@[]abcdefghijklmnopqrstuvwxyz|~".indexOf(addStartAndEnd.charAt(1));
        int indexOf3 = "$^S0%<>&'()*+,-./:;=?@[]abcdefghijklmnopqrstuvwxyz|~".indexOf(addStartAndEnd.charAt(2));
        double d = 0.0d;
        for (int i = 3; i < length; i++) {
            int i2 = indexOf;
            indexOf = indexOf2;
            indexOf2 = indexOf3;
            indexOf3 = "$^S0%<>&'()*+,-./:;=?@[]abcdefghijklmnopqrstuvwxyz|~".indexOf(addStartAndEnd.charAt(i));
            d += this.data[(i2 * this.step0) + (indexOf * this.step1) + (indexOf2 * this.step2) + indexOf3];
        }
        return d / 500.0d;
    }

    public double testWordSuffixProb(String str) {
        return this.suffixClassifier.scoreWord(parseWord(str));
    }

    public double testWordSuffix(String str) {
        double scoreWord = this.suffixClassifier.scoreWord(parseWord(str));
        return Math.log(scoreWord) - Math.log(1.0d - scoreWord);
    }

    public static String parseWord(String str) {
        return p3.matcher(p2.matcher(p1.matcher(str.toLowerCase().replace('\"', '\'').replace('{', '[').replace('}', ']').replace((char) 8211, '-').replace((char) 8212, '-')).replaceAll(SchemaSymbols.ATTVAL_FALSE_0)).replaceAll(SchemaSymbols.ATTVAL_FALSE_0)).replaceAll("S");
    }

    public static String addStartAndEnd(String str) {
        return "^^^" + str + "$";
    }

    short[] getData() {
        return this.data;
    }

    public boolean compareTo(NGram nGram) {
        short[] data = nGram.getData();
        if (data.length != this.data.length) {
            return false;
        }
        for (int i = 0; i < this.data.length; i++) {
            if (this.data[i] != data[i]) {
                return false;
            }
        }
        return true;
    }
}
