package uk.ac.cam.ch.wwmm.oscartokeniser;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Elements;
import nu.xom.Node;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.cam.ch.wwmm.oscar.exceptions.DataFormatException;
import uk.ac.cam.ch.wwmm.oscar.exceptions.OscarInitialisationException;
import uk.ac.cam.ch.wwmm.oscar.tools.ResourceGetter;
import uk.ac.cam.ch.wwmm.oscar.types.NamedEntityType;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscartokeniser/TokenClassifier.class */
public class TokenClassifier {
    private static final String REGEX_FILENAME = "tokenLevelRegularExpressions.xml";
    private static final Logger LOG = LoggerFactory.getLogger(TokenClassifier.class);
    private static ResourceGetter rg = new ResourceGetter(TokenClassifier.class.getClassLoader(), "/uk/ac/cam/ch/wwmm/oscartokeniser/");
    private static TokenClassifier defaultInstance;
    private final Map<String, TokenClass> tokenLevelRegexs;

    public TokenClassifier(Map<String, TokenClass> map) {
        this.tokenLevelRegexs = Collections.unmodifiableMap(new HashMap(map));
    }

    @Deprecated
    public static void reinitialise() {
        defaultInstance = null;
        getDefaultInstance();
    }

    public static synchronized TokenClassifier getDefaultInstance() {
        if (defaultInstance == null) {
            try {
                try {
                    defaultInstance = new TokenClassifier(readXML(rg.getXMLDocument(REGEX_FILENAME)));
                } catch (DataFormatException e) {
                    throw new OscarInitialisationException("failed to load TokenClassifier", e);
                }
            } catch (IOException e2) {
                throw new OscarInitialisationException("failed to load TokenClassifier", e2);
            } catch (ParsingException e3) {
                throw new OscarInitialisationException("failed to load TokenClassifier", e3);
            }
        }
        return defaultInstance;
    }

    public static Map<String, TokenClass> readXML(Document document) throws DataFormatException {
        LOG.debug("Initialising tlrs... ");
        new HashMap();
        HashMap hashMap = new HashMap();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        Elements childElements = document.getRootElement().getFirstChildElement("tlrs").getChildElements("tlr");
        for (int i = 0; i < childElements.size(); i++) {
            Element element = childElements.get(i);
            TokenClass tokenClass = new TokenClass(NamedEntityType.valueOf(element.getAttributeValue("type")), getDefText(element.getAttributeValue("idref"), hashMap, document), element.getAttributeValue("name"));
            if (linkedHashMap.containsKey(tokenClass.getName())) {
                LOG.warn("Duplicate TokenLevelRegex defined: " + tokenClass.getName());
            } else {
                linkedHashMap.put(tokenClass.getName(), tokenClass);
            }
        }
        LOG.debug("tlrs initialised");
        return linkedHashMap;
    }

    private static String getNodeText(Element element, Map<String, String> map, Document document) throws DataFormatException {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < element.getChildCount(); i++) {
            Node child = element.getChild(i);
            if (child instanceof Text) {
                stringBuffer.append(child.getValue());
            } else if (child instanceof Element) {
                Element element2 = (Element) child;
                if (element2.getLocalName().equals("insert")) {
                    stringBuffer.append(getDefText(element2.getAttributeValue("idref"), map, document));
                }
            }
        }
        return stringBuffer.toString();
    }

    private static String getDefText(String str, Map<String, String> map, Document document) throws DataFormatException {
        if (map.containsKey(str)) {
            return map.get(str);
        }
        Nodes query = document.query("//def[@id=\"" + str + "\"]");
        if (query.size() != 1) {
            throw new DataFormatException("too many definitions for " + str);
        }
        Element element = (Element) query.get(0);
        StringBuffer stringBuffer = new StringBuffer("");
        if (element.getAttributeValue("type").equals("const")) {
            stringBuffer.append(getNodeText(element, map, document));
        } else if (element.getAttributeValue("type").equals("list")) {
            stringBuffer.append("(");
            int i = 0;
            Elements childElements = element.getChildElements("item");
            for (int i2 = 0; i2 < childElements.size(); i2++) {
                if (i > 0) {
                    stringBuffer.append("|");
                }
                stringBuffer.append(getNodeText(childElements.get(i2), map, document));
                i++;
            }
            stringBuffer.append(")");
        }
        map.put(str, stringBuffer.toString());
        return stringBuffer.toString();
    }

    Map<String, TokenClass> getTokenLevelRegexes() {
        return this.tokenLevelRegexs;
    }

    public Set<NamedEntityType> classifyToken(String str) {
        Set<NamedEntityType> emptySet = Collections.emptySet();
        for (TokenClass tokenClass : this.tokenLevelRegexs.values()) {
            if (tokenClass.isMatch(str)) {
                if (emptySet.isEmpty()) {
                    emptySet = Collections.singleton(tokenClass.getType());
                } else {
                    if (emptySet.size() == 1) {
                        emptySet = new HashSet(emptySet);
                    }
                    emptySet.add(tokenClass.getType());
                }
            }
        }
        return emptySet;
    }

    public boolean isTokenLevelRegexMatch(String str, String str2) {
        TokenClass tokenClass = this.tokenLevelRegexs.get(str2);
        if (tokenClass == null) {
            throw new IllegalArgumentException("unknown token-level regex: " + str2);
        }
        return tokenClass.isMatch(str);
    }
}
