package org.xmlcml.norma.input.html;

import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlFieldSet;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlInlineFrame;
import com.gargoylesoftware.htmlunit.html.HtmlNoScript;
import java.io.File;
import java.util.Arrays;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.xmlcml.html.HtmlElement;
import org.xmlcml.html.HtmlFactory;
import org.xmlcml.norma.NormaArgProcessor;

/* loaded from: input_file:org/xmlcml/norma/input/html/HtmlCleaner.class */
public class HtmlCleaner {
    private static final Logger LOG = Logger.getLogger(HtmlCleaner.class);
    private static final String JSOUP = "jsoup";
    private static final String JTIDY = "jtidy";
    private static final String HTMLUNIT = "htmlunit";
    private NormaArgProcessor normaArgProcessor;
    private HtmlElement htmlElement;
    private HtmlFactory htmlFactory;

    public HtmlCleaner(NormaArgProcessor normaArgProcessor) {
        this.normaArgProcessor = normaArgProcessor;
        createHtmlFactory();
    }

    private void createHtmlFactory() {
        this.htmlFactory = new HtmlFactory();
        this.htmlFactory.setContentList(Arrays.asList(HtmlNoScript.TAG_NAME, "script", "style", HtmlInlineFrame.TAG_NAME, HtmlButton.TAG_NAME, HtmlFieldSet.TAG_NAME, "label"));
        this.htmlFactory.setNoContentList(Arrays.asList("input", "link", HtmlForm.TAG_NAME));
        this.htmlFactory.setBalanceList(Arrays.asList("meta"));
        this.htmlFactory.setUseJsoup(true);
    }

    public HtmlElement cleanHTML2XHTML(String str) {
        if (!JSOUP.equals(str)) {
            LOG.warn("tidying option not supported:" + str);
        }
        File checkAndGetInputFile = this.normaArgProcessor.checkAndGetInputFile(this.normaArgProcessor.getCurrentCMTree());
        this.htmlElement = null;
        try {
            this.htmlElement = this.htmlFactory.parse(checkAndGetInputFile);
            return this.htmlElement;
        } catch (Exception e) {
            throw new RuntimeException("Cannot transform HTML " + checkAndGetInputFile, e);
        }
    }

    public HtmlElement getHtmlElement() {
        return this.htmlElement;
    }

    static {
        LOG.setLevel(Level.DEBUG);
    }
}
