package uk.ac.cam.ch.wwmm.oscarrecogniser.extractedtrainingdata;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.ParsingException;
import org.xmlcml.cml.base.CMLConstants;
import uk.ac.cam.ch.wwmm.oscar.exceptions.OscarInitialisationException;
import uk.ac.cam.ch.wwmm.oscar.tools.ResourceGetter;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarrecogniser/extractedtrainingdata/ExtractedTrainingData.class */
public class ExtractedTrainingData {
    private static ExtractedTrainingData defaultInstance;
    private final Set<String> chemicalWords;
    private final Set<String> nonChemicalWords;
    private final Set<String> chemicalNonWords;
    private final Set<String> nonChemicalNonWords;
    private final Set<String> afterHyphen;
    private final Set<String> notForPrefix;
    private final Set<String> pnStops;
    private final Set<String> polysemous;
    private final Set<String> rnEnd;
    private final Set<String> rnMid;

    public ExtractedTrainingData() {
        this.chemicalWords = Collections.emptySet();
        this.nonChemicalWords = Collections.emptySet();
        this.afterHyphen = Collections.emptySet();
        this.chemicalNonWords = Collections.emptySet();
        this.nonChemicalNonWords = Collections.emptySet();
        this.pnStops = Collections.emptySet();
        this.notForPrefix = Collections.emptySet();
        this.polysemous = Collections.emptySet();
        this.rnEnd = Collections.emptySet();
        this.rnMid = Collections.emptySet();
    }

    public ExtractedTrainingData(Element element) {
        try {
            this.chemicalWords = readStringsFromElement(element.getFirstChildElement("chemicalWords"));
            this.nonChemicalWords = readStringsFromElement(element.getFirstChildElement("nonChemicalWords"));
            this.chemicalNonWords = readStringsFromElement(element.getFirstChildElement("chemicalNonWords"));
            this.nonChemicalNonWords = readStringsFromElement(element.getFirstChildElement("nonChemicalNonWords"));
            this.afterHyphen = readStringsFromElement(element.getFirstChildElement("afterHyphen"));
            this.notForPrefix = readStringsFromElement(element.getFirstChildElement("notForPrefix"));
            this.pnStops = readStringsFromElement(element.getFirstChildElement("pnStops"));
            this.polysemous = readStringsFromElement(element.getFirstChildElement("polysemous"));
            this.rnEnd = readStringsFromElement(element.getFirstChildElement("rnEnd"));
            this.rnMid = readStringsFromElement(element.getFirstChildElement("rnMid"));
        } catch (IOException e) {
            throw new OscarInitialisationException("failed to load ExtractedTrainingData", e);
        }
    }

    public static ExtractedTrainingData getDefaultInstance() {
        if (defaultInstance == null) {
            defaultInstance = loadExtractedTrainingData("chempapers");
        }
        return defaultInstance;
    }

    public static ExtractedTrainingData loadExtractedTrainingData(String str) {
        return new ExtractedTrainingData(loadEtdElement(str));
    }

    static Element loadEtdElement(String str) {
        try {
            Document xMLDocument = new ResourceGetter(ExtractedTrainingData.class.getClassLoader(), "/uk/ac/cam/ch/wwmm/oscarrecogniser/models/").getXMLDocument(str + CMLConstants.XML_SUFF);
            if (xMLDocument == null) {
                return null;
            }
            return xMLDocument.getRootElement().getFirstChildElement("etd");
        } catch (IOException e) {
            throw new OscarInitialisationException("failed to load ExtractedTrainingData for model: " + str, e);
        } catch (ParsingException e2) {
            throw new OscarInitialisationException("failed to load ExtractedTrainingData for model: " + str, e2);
        }
    }

    private Element stringsToElement(Collection<String> collection, String str) {
        Element element = new Element(str);
        StringBuffer stringBuffer = new StringBuffer();
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next());
            stringBuffer.append("\n");
        }
        element.appendChild(stringBuffer.toString());
        return element;
    }

    public Element toXML() {
        Element element = new Element("etd");
        element.appendChild(stringsToElement(this.chemicalWords, "chemicalWords"));
        element.appendChild(stringsToElement(this.nonChemicalWords, "nonChemicalWords"));
        element.appendChild(stringsToElement(this.chemicalNonWords, "chemicalNonWords"));
        element.appendChild(stringsToElement(this.nonChemicalNonWords, "nonChemicalNonWords"));
        element.appendChild(stringsToElement(this.afterHyphen, "afterHyphen"));
        element.appendChild(stringsToElement(this.notForPrefix, "notForPrefix"));
        element.appendChild(stringsToElement(this.pnStops, "pnStops"));
        element.appendChild(stringsToElement(this.polysemous, "polysemous"));
        element.appendChild(stringsToElement(this.rnEnd, "rnEnd"));
        element.appendChild(stringsToElement(this.rnMid, "rnMid"));
        return element;
    }

    private Set<String> readStringsFromElement(Element element) throws IOException {
        HashSet hashSet = new HashSet();
        BufferedReader bufferedReader = new BufferedReader(new StringReader(element.getValue()));
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return Collections.unmodifiableSet(hashSet);
            }
            hashSet.add(str.trim());
            readLine = bufferedReader.readLine();
        }
    }

    public Set<String> getNotForPrefix() {
        return this.notForPrefix;
    }

    public Set<String> getNonChemicalWords() {
        return this.nonChemicalWords;
    }

    public Set<String> getChemicalWords() {
        return this.chemicalWords;
    }

    public Set<String> getChemicalNonWords() {
        return this.chemicalNonWords;
    }

    public Set<String> getNonChemicalNonWords() {
        return this.nonChemicalNonWords;
    }

    public Set<String> getAfterHyphen() {
        return this.afterHyphen;
    }

    public Set<String> getPnStops() {
        return this.pnStops;
    }

    public Set<String> getPolysemous() {
        return this.polysemous;
    }

    public Set<String> getRnEnd() {
        return this.rnEnd;
    }

    public Set<String> getRnMid() {
        return this.rnMid;
    }
}
