package com.chine.invertedindex.analysis;

import java.util.Iterator;
import java.util.regex.Pattern;

/* loaded from: input_file:com/chine/invertedindex/analysis/HTMLChineseTokenizer.class */
public class HTMLChineseTokenizer extends ChineseTokenizer {
    private static Pattern htmlPat = Pattern.compile("<\\/?[^>]*>", 64);

    @Override // com.chine.invertedindex.analysis.ChineseTokenizer, com.chine.invertedindex.analysis.Tokenizer, com.chine.invertedindex.analysis.ITokenizer
    public void tokenize() {
        this._input = htmlPat.matcher(this._input).replaceAll("");
        super.tokenize();
    }

    public static void main(String[] strArr) {
        HTMLChineseTokenizer hTMLChineseTokenizer = new HTMLChineseTokenizer();
        for (String str : new String[]{"<span>abc英语</span>", "<P><A>Abc英语</A></P>", "<div class=\"grpState\">共有<span class=\"red\">2251</span>位成员 \u3000 人气指数 <span class=\"red\">-</span> \u3000 最新排名 <span class=\"red\">-</span></div>"}) {
            hTMLChineseTokenizer.clear();
            hTMLChineseTokenizer.set(str);
            hTMLChineseTokenizer.tokenize();
            Iterator<String> it = hTMLChineseTokenizer.iterator();
            while (it.hasNext()) {
                System.out.print(String.valueOf(it.next()) + "/");
            }
            System.out.println();
        }
    }
}
