package org.xmlcml.html.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.StringWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nu.xom.Element;
import nu.xom.Nodes;
import nu.xom.XPathContext;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.custommonkey.xmlunit.XMLConstants;
import org.xmlcml.euclid.EuclidConstants;
import org.xmlcml.html.HtmlElement;
import org.xmlcml.xml.XMLUtil;

/* loaded from: input_file:org/xmlcml/html/util/HtmlUtil.class */
public class HtmlUtil {
    private static final int MIN_ESCAPE = 2;
    private static final int MAX_ESCAPE = 6;
    private static final Logger LOG = Logger.getLogger(HtmlUtil.class);
    public static XPathContext XHTML_XPATH = new XPathContext(WikipediaTokenizer.HEADING, "http://www.w3.org/1999/xhtml");
    public static Pattern ATTRIBUTE = Pattern.compile("\\s+([a-z]+\\s*=\\s*\\\"[^\\\"]+\\\")");
    static final char[][] badGood = {new char[]{160, ' '}, new char[]{8220, '\"'}, new char[]{8221, '\"'}, new char[]{8222, '\"'}, new char[]{8223, '\"'}};

    public static List<HtmlElement> getQueryHtmlElements(HtmlElement htmlElement, String str) {
        List<Element> queryElements = XMLUtil.getQueryElements(htmlElement, str, XHTML_XPATH);
        ArrayList arrayList = new ArrayList();
        for (Element element : queryElements) {
            if (!(element instanceof HtmlElement)) {
                throw new RuntimeException("Element was not HtmlElement: " + element.toXML());
            }
            arrayList.add((HtmlElement) element);
        }
        return arrayList;
    }

    public static List<String> getQueryHtmlStrings(Element element, String str) {
        Nodes query = element.query(str, XHTML_XPATH);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < query.size(); i++) {
            arrayList.add(query.get(i).getValue());
        }
        return arrayList;
    }

    private static void addHTMLNamespace(Element element) {
        Nodes query = element.query("//*[namespace-uri()='']");
        for (int i = 0; i < query.size(); i++) {
            ((Element) query.get(i)).addNamespaceDeclaration("", "http://www.w3.org/1999/xhtml");
        }
    }

    public static HtmlElement readAndCreateElement(File file) throws Exception {
        LOG.debug("opening file " + file);
        return new HTMLTidy().createHtmlElement(new FileInputStream(file));
    }

    public static HtmlElement readAndCreateElement(URL url) throws Exception {
        LOG.debug("opening URL Stream");
        HtmlElement readAndCreateElement = new HtmlUnitWrapper().readAndCreateElement(url);
        LOG.debug("built document");
        return readAndCreateElement;
    }

    public static final String unescapeHtml3(String str, Map<String, CharSequence> map) {
        StringWriter stringWriter = null;
        int length = str.length();
        int i = 1;
        int i2 = 0;
        while (true) {
            if (i < length && str.charAt(i - 1) != '&') {
                i++;
            } else {
                if (i >= length) {
                    break;
                }
                int i3 = i;
                while (i3 < length && i3 < i + 6 + 1 && str.charAt(i3) != ';') {
                    i3++;
                }
                if (i3 == length || i3 < i + 2 || i3 == i + 6 + 1) {
                    i++;
                } else {
                    if (str.charAt(i) == '#') {
                        int i4 = i + 1;
                        int i5 = 10;
                        char charAt = str.charAt(i4);
                        if (charAt == 'x' || charAt == 'X') {
                            i4++;
                            i5 = 16;
                        }
                        try {
                            int parseInt = Integer.parseInt(str.substring(i4, i3), i5);
                            if (stringWriter == null) {
                                stringWriter = new StringWriter(str.length());
                            }
                            stringWriter.append((CharSequence) str.substring(i2, i - 1));
                            writeEntityIncludingSurrogates(stringWriter, parseInt);
                        } catch (NumberFormatException e) {
                            i++;
                        }
                    } else {
                        CharSequence charSequence = map.get(str.substring(i, i3));
                        if (charSequence == null) {
                            i++;
                        } else {
                            LOG.trace("changed " + str.substring(i, i3) + " to " + ((Object) charSequence));
                            if (stringWriter == null) {
                                stringWriter = new StringWriter(str.length());
                            }
                            stringWriter.append((CharSequence) str.substring(i2, i - 1));
                            stringWriter.append(charSequence);
                        }
                    }
                    i2 = i3 + 1;
                    i = i2;
                }
            }
        }
        if (stringWriter == null) {
            return str;
        }
        stringWriter.append((CharSequence) str.substring(i2, length));
        return stringWriter.toString();
    }

    private static void writeEntityIncludingSurrogates(StringWriter stringWriter, int i) {
        if (i <= 65535) {
            stringWriter.write(i);
            return;
        }
        char[] chars = Character.toChars(i);
        stringWriter.write(chars[0]);
        stringWriter.write(chars[1]);
    }

    public static String stripAttributeFromText(String str, String str2) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        Matcher matcher = Pattern.compile("(" + str2 + "\\s*=\\s*\\\"[^\\\"]+\\\")").matcher(str);
        while (matcher.find(i)) {
            sb.append(str.substring(i, matcher.start()));
            i = matcher.end();
        }
        sb.append(str.substring(i));
        return sb.toString();
    }

    public static String stripElementFromTextString(String str, String str2) {
        StringBuilder sb = new StringBuilder(str);
        while (true) {
            int indexOf = sb.indexOf("<" + str2);
            if (indexOf == -1) {
                return sb.toString();
            }
            int indexOf2 = sb.indexOf(XMLConstants.OPEN_END_NODE + str2 + ">", sb.indexOf(">", indexOf + 2));
            if (indexOf2 == -1) {
                throw new RuntimeException("no trailing </" + str2 + ">");
            }
            sb.delete(indexOf, indexOf2 + str2.length() + 3);
        }
    }

    public static String stripDOCTYPE(String str) {
        if (str == null || str.trim().length() == 0) {
            LOG.error("Empty Html Document");
            return "";
        }
        StringBuilder sb = new StringBuilder(str);
        int indexOf = sb.indexOf("<!DOCTYPE", skipWhitespace(sb, 0));
        int skipWhitespace = skipWhitespace(sb, sb.indexOf(">", indexOf) + 1);
        if (indexOf >= 0 && skipWhitespace >= 0) {
            sb.delete(indexOf, skipWhitespace);
        }
        return sb.toString();
    }

    public static final String replaceProblemCharacters(String str) {
        StringBuilder sb = new StringBuilder(str);
        for (int i = 0; i < sb.length(); i++) {
            char charAt = sb.charAt(i);
            for (int i2 = 0; i2 < badGood.length; i2++) {
                if (charAt == badGood[i2][0]) {
                    LOG.trace("substituted " + badGood[i2][0] + " with" + badGood[i2][1]);
                    sb.setCharAt(i, badGood[i2][1]);
                }
            }
        }
        return sb.toString();
    }

    private static int skipWhitespace(StringBuilder sb, int i) {
        while (Character.isWhitespace(sb.charAt(i))) {
            i++;
        }
        return i;
    }

    public static String removeBMCHorror(String str) {
        String replaceAll = str.replaceAll("\"“", EuclidConstants.S_QUOT).replaceAll("”\"", EuclidConstants.S_QUOT);
        if (replaceAll.length() != str.length()) {
            replaceAll = replaceAll.replaceAll("<html", "<html xmlns:g=\"http://g.foo/\"").replaceAll("<button [^>]*[^/]>", "<button>");
        }
        return replaceAll;
    }
}
