package org.bitbucket.efsmtool.inference.clustering;

import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import weka.clusterers.EM;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ArffSaver;
import weka.core.tokenizers.NGramTokenizer;
import weka.core.xml.XMLInstances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;

/* loaded from: input_file:org/bitbucket/efsmtool/inference/clustering/StringClusterer.class */
public class StringClusterer {
    protected Instances clusterableInstances;

    public StringClusterer(Map<Integer, String> map) {
        ArrayList arrayList = new ArrayList(2);
        arrayList.add(new Attribute("id"));
        arrayList.add(new Attribute("content", (ArrayList) null));
        Instances instances = new Instances(XMLInstances.TAG_INSTANCES, (ArrayList<Attribute>) arrayList, 0);
        Iterator<Integer> it = map.keySet().iterator();
        while (it.hasNext()) {
            String str = map.get(it.next());
            DenseInstance denseInstance = new DenseInstance(2);
            denseInstance.setValue(instances.attribute(0), r0.intValue());
            denseInstance.setValue(instances.attribute(1), instances.attribute(1).addStringValue(str));
            instances.add((Instance) denseInstance);
        }
        instances.setClassIndex(-1);
        NGramTokenizer nGramTokenizer = new NGramTokenizer();
        StringToWordVector stringToWordVector = new StringToWordVector();
        try {
            stringToWordVector.setInputFormat(instances);
            stringToWordVector.setTokenizer(nGramTokenizer);
            this.clusterableInstances = Filter.useFilter(instances, stringToWordVector);
            ArffSaver arffSaver = new ArffSaver();
            arffSaver.setInstances(this.clusterableInstances);
            arffSaver.setFile(new File("test.arff"));
            arffSaver.writeBatch();
            cluster();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void cluster() throws Exception {
        EM em = new EM();
        em.setOptions(new String[]{"-I", "100"});
        em.buildClusterer(this.clusterableInstances);
    }
}
