package uk.ac.cam.ch.wwmm.chemicaltagger;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import uk.ac.cam.ch.wwmm.oscar.document.Token;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/RegexTagger.class */
public class RegexTagger implements Tagger {
    private List<Rule> rules;
    private String tagFilePath;
    private static Logger LOG = Logger.getLogger(RegexTagger.class);
    private List<String> ignoredTags;

    public RegexTagger() {
        this.tagFilePath = "/uk/ac/cam/ch/wwmm/chemicaltagger/regexTagger/regexTags.txt";
        this.ignoredTags = new ArrayList();
        initializeRules();
    }

    public RegexTagger(List<String> list) {
        this.tagFilePath = "/uk/ac/cam/ch/wwmm/chemicaltagger/regexTagger/regexTags.txt";
        this.ignoredTags = new ArrayList();
        this.ignoredTags = list;
        initializeRules();
    }

    public RegexTagger(String str) {
        this.tagFilePath = "/uk/ac/cam/ch/wwmm/chemicaltagger/regexTagger/regexTags.txt";
        this.ignoredTags = new ArrayList();
        this.tagFilePath = str;
    }

    public List<Rule> getRules() {
        return this.rules;
    }

    public void setRules(List<Rule> list) {
        this.rules = list;
    }

    protected void initializeRules() {
        this.rules = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(Utils.getInputStream(getClass(), this.tagFilePath), Charset.forName("UTF-8")));
            if (!bufferedReader.ready()) {
                throw new IOException();
            }
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                } else if (!readLine.startsWith("#") && !StringUtils.isEmpty(readLine)) {
                    String[] split = readLine.split("---");
                    if (split.length > 1 && !this.ignoredTags.contains(split[0])) {
                        this.rules.add(new Rule(split[0], split[1]));
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override // uk.ac.cam.ch.wwmm.chemicaltagger.Tagger
    public List<String> runTagger(List<Token> list, String str) {
        Pattern compile = Pattern.compile("");
        ArrayList arrayList = new ArrayList();
        Iterator<Token> it = list.iterator();
        while (it.hasNext()) {
            try {
                Matcher matcher = compile.matcher(it.next().getSurface());
                String str2 = "nil";
                Iterator<Rule> it2 = this.rules.iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    Rule next = it2.next();
                    if (matcher.usePattern(next.getPattern()).lookingAt()) {
                        str2 = next.getName();
                        break;
                    }
                }
                arrayList.add(str2);
            } catch (Exception e) {
                LOG.debug("Null pointer right there" + list);
            }
        }
        return arrayList;
    }

    @Override // uk.ac.cam.ch.wwmm.chemicaltagger.Tagger
    public List<String> getIgnoredTags() {
        return this.ignoredTags;
    }
}
