package uk.ac.cam.ch.wwmm.chemicaltagger;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nu.xom.Document;
import nu.xom.Serializer;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;
import org.xmlcml.euclid.EuclidConstants;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/Utils.class */
public class Utils {
    private static Pattern exponentXMLPattern = Pattern.compile("(-?\\d+)<sup>(-?\\d+)</sup>");

    private Utils() {
    }

    public static String makeNCName(String str) {
        String replaceAll;
        if (str == null) {
            replaceAll = "emptyName";
        } else if (str.trim().length() == 0) {
            replaceAll = "emptyName";
        } else {
            String trim = str.trim();
            char charAt = trim.charAt(0);
            if (!Character.isLetter(charAt) && charAt != '_') {
                trim = '_' + trim;
            }
            replaceAll = trim.replaceAll("[^A-Za-z0-9_.-]", EuclidConstants.S_UNDER);
        }
        return replaceAll;
    }

    public static void writeXMLToFile(Document document, String str) {
        try {
            Serializer serializer = new Serializer(new FileOutputStream(str), "UTF-8");
            serializer.write(document);
            serializer.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static String cleanHTMLText(String str) {
        return Jsoup.parse(Jsoup.clean(convertExponentials(StringEscapeUtils.unescapeHtml(str)), new Whitelist())).text();
    }

    private static String convertExponentials(String str) {
        String str2 = str;
        Matcher matcher = exponentXMLPattern.matcher(str);
        if (matcher.find()) {
            str2 = matcher.replaceAll(matcher.group(1) + EuclidConstants.S_CARET + matcher.group(2));
        }
        return str2;
    }

    public static String readSentence(String str) {
        try {
            return IOUtils.toString(ClassLoader.getSystemResourceAsStream(str), "UTF-8").trim();
        } catch (IOException e) {
            throw new RuntimeException("Cannot read sentence: " + str);
        }
    }

    public static String getPathAsInputStream(String str) throws IOException {
        BufferedReader bufferedReader = null;
        try {
            try {
                bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str)), "UTF-8"));
                String readLine = bufferedReader.readLine();
                IOUtils.closeQuietly(bufferedReader);
                return readLine.trim();
            } catch (IOException e) {
                throw new RuntimeException("Cannot read sentence: " + str);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly(bufferedReader);
            throw th;
        }
    }

    public static InputStream getInputStream(Class<?> cls, String str) throws IOException {
        InputStream resourceAsStream = cls.getResourceAsStream(str);
        if (resourceAsStream == null) {
            throw new IOException("File not found: " + str + " (using context " + cls.getName() + ")");
        }
        return resourceAsStream;
    }

    public static boolean containsNumber(String str) {
        for (char c : str.toCharArray()) {
            if (Character.isDigit(c)) {
                return true;
            }
        }
        return false;
    }

    public static Document runChemicalTagger(String str) {
        return runChemicalTagger(str, false);
    }

    public static Document runChemicalTagger(String str, boolean z) {
        ChemistrySentenceParser chemistrySentenceParser = new ChemistrySentenceParser(ChemistryPOSTagger.getDefaultInstance().runTaggers(str));
        chemistrySentenceParser.parseTags();
        return chemistrySentenceParser.makeXMLDocument();
    }

    public static HashSet<String> loadsTagsFromFile(Class<?> cls) {
        HashSet<String> hashSet = new HashSet<>();
        InputStream resourceAsStream = cls.getClassLoader().getResourceAsStream("ChemicalChunker.tokens");
        try {
            for (String str : IOUtils.readLines(resourceAsStream)) {
                if (str.startsWith(EuclidConstants.S_APOS)) {
                    hashSet.add(str.split(EuclidConstants.S_APOS)[1]);
                }
            }
            IOUtils.closeQuietly(resourceAsStream);
            return hashSet;
        } catch (IOException e) {
            throw new RuntimeException("Could not load tokens file", e);
        }
    }
}
