package org.xmlcml.ami2.tokens;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.xmlcml.ami2.dictionary.TermPhrase;

/* loaded from: input_file:org/xmlcml/ami2/tokens/LuceneUtils.class */
public class LuceneUtils {
    private static final Logger LOG = Logger.getLogger(LuceneUtils.class);

    public static TokenStream createTokenStreamQuietly(Analyzer analyzer, String str) {
        try {
            TokenStream tokenStream = analyzer.tokenStream((String) null, new StringReader(str));
            resetTokenStreamQuietly(tokenStream);
            return tokenStream;
        } catch (IOException e) {
            throw new RuntimeException("cannot create tokenStream", e);
        }
    }

    public static TokenStream createWhitespaceTokenStreamQuietly(String str) {
        try {
            TokenStream tokenStream = new WhitespaceAnalyzer().tokenStream((String) null, new StringReader(str));
            resetTokenStreamQuietly(tokenStream);
            return tokenStream;
        } catch (IOException e) {
            throw new RuntimeException("cannot create tokenStream", e);
        }
    }

    public static List<String> whitespaceTokenize(String str) {
        return tokenize(str, new WhitespaceAnalyzer());
    }

    public static List<String> tokenize(String str, Analyzer analyzer) {
        ArrayList arrayList = new ArrayList();
        try {
            TokenStream createTokenStreamQuietly = createTokenStreamQuietly(analyzer, str);
            while (createTokenStreamQuietly.incrementToken()) {
                arrayList.add(((CharTermAttribute) createTokenStreamQuietly.getAttribute(CharTermAttribute.class)).toString());
            }
            return arrayList;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static List<String> createShingleStream(String str, int i, int i2, Analyzer analyzer) throws IOException {
        TokenStream createTokenStreamQuietly = createTokenStreamQuietly(analyzer, str);
        ShingleFilter shingleFilter = new ShingleFilter(createTokenStreamQuietly, i, i2);
        CharTermAttribute charTermAttribute = (CharTermAttribute) shingleFilter.addAttribute(CharTermAttribute.class);
        shingleFilter.setOutputUnigrams(false);
        ArrayList arrayList = new ArrayList();
        while (shingleFilter.incrementToken()) {
            arrayList.add(charTermAttribute.toString());
        }
        shingleFilter.end();
        shingleFilter.close();
        createTokenStreamQuietly.close();
        return arrayList;
    }

    public static List<String> createWhitespaceShingleStream(String str, int i, int i2) throws IOException {
        return createShingleStream(str, i, i2, new WhitespaceAnalyzer());
    }

    public static void resetTokenStreamQuietly(TokenStream tokenStream) {
        try {
            tokenStream.reset();
        } catch (IOException e) {
            try {
                tokenStream.close();
            } catch (IOException e2) {
            }
            throw new RuntimeException("Cannot reset stream", e);
        }
    }

    public static TermPhrase createPhraseFromTokenStream(TokenStream tokenStream) {
        return TermPhrase.createTermPhrase(createListFromTokenStream(tokenStream));
    }

    public static List<String> createListFromTokenStream(TokenStream tokenStream) {
        CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class);
        ArrayList arrayList = new ArrayList();
        while (tokenStream.incrementToken()) {
            try {
                arrayList.add(charTermAttribute.toString());
            } catch (IOException e) {
                throw new RuntimeException("token stream failed", e);
            }
        }
        tokenStream.close();
        return arrayList;
    }

    public static List<String> applyPorterStemming(List<String> list) {
        PorterStemmer porterStemmer = new PorterStemmer();
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(porterStemmer.stem(it.next()));
        }
        return arrayList;
    }

    public static TermPhrase applyPorterStemming(String str) {
        return TermPhrase.createTermPhrase(applyPorterStemming((List<String>) Arrays.asList(str.split("\\s+"))));
    }

    public static List<String> createWhitespaceList(String str) {
        return createListFromTokenStream(createTokenStreamQuietly(new WhitespaceAnalyzer(), str));
    }

    static {
        LOG.setLevel(Level.DEBUG);
    }
}
