package org.xmlcml.html.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import nu.xom.Document;
import nu.xom.Element;
import org.apache.commons.io.IOUtils;
import org.w3c.tidy.Tidy;
import org.xmlcml.cml.base.CMLUtil;
import org.xmlcml.euclid.EuclidConstants;
import org.xmlcml.html.HtmlI;

/* loaded from: input_file:org/xmlcml/html/util/HTMLTidy.class */
public class HTMLTidy {
    public static Document htmlTidy(InputStream inputStream) throws IOException {
        if (inputStream == null) {
            throw new RuntimeException("Null input for HTMLTidy");
        }
        List<String> preTidy = preTidy(IOUtils.readLines(inputStream));
        Tidy createTidyWithOptions = createTidyWithOptions();
        InputStream createInputStream = createInputStream(preTidy);
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        createTidyWithOptions.parse(createInputStream, byteArrayOutputStream);
        byteArrayOutputStream.close();
        Document document = null;
        String str = "" + new String(byteArrayOutputStream.toByteArray());
        if (str.length() > 0) {
            document = CMLUtil.stripDTDAndOtherProblematicXMLHeadings(str);
        }
        return document;
    }

    private static InputStream createInputStream(List<String> list) {
        StringBuilder sb = new StringBuilder();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next() + EuclidConstants.S_SPACE);
        }
        return new ByteArrayInputStream(sb.toString().getBytes());
    }

    private static List<String> preTidy(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(replaceBadTags(it.next(), "it", HtmlI.TAG));
        }
        return arrayList;
    }

    private static ByteArrayOutputStream preTidy(ByteArrayOutputStream byteArrayOutputStream) {
        byte[] byteArray = byteArrayOutputStream.toByteArray();
        String str = new String(byteArray);
        System.out.println(byteArray.length + EuclidConstants.S_SPACE + str);
        int i = 0;
        while (i != -1) {
            i = str.indexOf("it", i);
            System.out.println(">> " + str.substring(Math.max(0, i - 5), Math.min(str.length(), i + 10)));
        }
        String replaceBadTags = replaceBadTags(str, "it", HtmlI.TAG);
        ByteArrayOutputStream byteArrayOutputStream2 = new ByteArrayOutputStream();
        try {
            byteArrayOutputStream2.write(replaceBadTags.getBytes());
            return byteArrayOutputStream2;
        } catch (IOException e) {
            throw new RuntimeException("Cannot write BAOS in HTMLTidy", e);
        }
    }

    private static String replaceBadTags(String str, String str2, String str3) {
        return str.replaceAll(EuclidConstants.S_LANGLE + str2 + EuclidConstants.S_RANGLE, EuclidConstants.S_LANGLE + str3 + EuclidConstants.S_RANGLE).replaceAll("</" + str2 + EuclidConstants.S_RANGLE, "</" + str3 + EuclidConstants.S_RANGLE);
    }

    private static Tidy createTidyWithOptions() {
        Tidy tidy = new Tidy();
        tidy.setDocType(null);
        tidy.setXmlOut(true);
        tidy.setDropEmptyParas(true);
        tidy.setDropFontTags(true);
        tidy.setMakeClean(true);
        tidy.setNumEntities(true);
        tidy.setXHTML(true);
        tidy.setQuiet(true);
        tidy.setQuoteMarks(true);
        tidy.setShowWarnings(false);
        return tidy;
    }

    public static Element convertStringToXHTML(String str) {
        try {
            Document htmlTidy = htmlTidy(new ByteArrayInputStream(str.getBytes()));
            if (htmlTidy == null) {
                return null;
            }
            return htmlTidy.getRootElement();
        } catch (Exception e) {
            throw new RuntimeException("parse: " + e);
        }
    }
}
