package org.xmlcml.norma.pubstyle;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import nu.xom.Element;
import nu.xom.Node;
import nu.xom.Nodes;
import org.apache.commons.lang3.tuple.Pair;
import org.xmlcml.html.HtmlElement;
import org.xmlcml.html.HtmlFactory;
import org.xmlcml.norma.InputFormat;
import org.xmlcml.norma.RawInput;
import org.xmlcml.norma.input.InputReader;
import org.xmlcml.norma.tagger.PubstyleTagger;
import org.xmlcml.xml.XMLUtil;

/* loaded from: input_file:org/xmlcml/norma/pubstyle/PubstyleReader.class */
public abstract class PubstyleReader {
    private URL url;
    private RawInput rawInput;
    private InputReader inputReader;
    private File file;
    private HtmlElement htmlElement;
    private InputFormat inputFormat;
    private HashMap<InputFormat, PubstyleTagger> taggerByFormatMap;
    private List<Pair<String, String>> tagReplacementList;
    public static List<String> EXTRANEOUS_TAGS = Arrays.asList("button", "fieldset", "iframe", "input", "link", "object", "script", "style");

    /* JADX INFO: Access modifiers changed from: protected */
    public PubstyleReader() {
        setDefaults();
        addTaggers();
    }

    public PubstyleReader(InputFormat inputFormat) {
        this();
        setFormat(inputFormat);
    }

    public void setFormat(InputFormat inputFormat) {
        this.inputFormat = inputFormat;
        this.inputReader = InputReader.createReader(inputFormat);
    }

    public InputFormat getInputFormat() {
        return this.inputFormat;
    }

    private void setDefaults() {
    }

    public InputReader getInputReader() {
        return this.inputReader;
    }

    public void readURL(String str) throws Exception {
        if (this.inputReader == null || str == null) {
            return;
        }
        this.url = new URL(str);
        this.rawInput = this.inputReader.read(this.url.openStream());
    }

    public void readFile(File file) throws Exception {
        if (this.inputReader == null || file == null) {
            return;
        }
        this.file = file;
        this.rawInput = this.inputReader.read(new FileInputStream(file));
    }

    public RawInput getRawInput() {
        return this.rawInput;
    }

    public HtmlElement getOrCreateXHtmlFromRawHtml() throws Exception {
        byte[] rawBytes = this.rawInput == null ? null : this.rawInput.getRawBytes();
        if (rawBytes != null) {
            this.htmlElement = new HtmlFactory().parse(new ByteArrayInputStream(rawBytes));
        }
        return this.htmlElement;
    }

    protected abstract void addTaggers();

    /* JADX INFO: Access modifiers changed from: protected */
    public void addTagger(InputFormat inputFormat, PubstyleTagger pubstyleTagger) {
        ensureTaggerByFormatMap();
        this.taggerByFormatMap.put(inputFormat, pubstyleTagger);
    }

    private void ensureTaggerByFormatMap() {
        if (this.taggerByFormatMap == null) {
            this.taggerByFormatMap = new HashMap<>();
        }
    }

    public PubstyleTagger getTagger(InputFormat inputFormat) {
        ensureTaggerByFormatMap();
        return this.taggerByFormatMap.get(inputFormat);
    }

    public void removeExtraneousHtmlTags(List<String> list) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < list.size(); i++) {
            String str = list.get(i);
            if (i == 0) {
                sb.append("//*[local-name()='" + str + "'");
            } else if (i == list.size() - 1) {
                sb.append("]");
                sb.append(" | //comment()");
            } else {
                sb.append(" or local-name()='" + str + "'");
            }
        }
        Nodes query = this.htmlElement.query(sb.toString());
        for (int size = query.size() - 1; size >= 0; size--) {
            query.get(size).detach();
        }
    }

    protected void removeExtraneousHtmlTagsAndXPaths() {
        removeExtraneousHtmlTags(EXTRANEOUS_TAGS);
        removeExtraneousXPaths();
    }

    protected void removeExtraneousXPaths() {
        Iterator<String> it = getExtraneousXPaths().iterator();
        while (it.hasNext()) {
            removeNodes(it.next());
        }
    }

    private void removeNodes(String str) {
        Nodes query = this.htmlElement.query(str);
        for (int size = query.size() - 1; size >= 0; size--) {
            query.get(size).detach();
        }
    }

    protected abstract List<String> getExtraneousXPaths();

    public HtmlElement normalize() {
        if (this.htmlElement != null) {
            removeExtraneousHtmlTagsAndXPaths();
            normalizeTagNames();
            normalizeDivStructure();
            normalizeCharacters();
        }
        return this.htmlElement;
    }

    protected void normalizeTagNames() {
        ensureTagNameReplacementList();
        for (Pair<String, String> pair : this.tagReplacementList) {
            changeTagName((String) pair.getLeft(), (String) pair.getRight());
        }
    }

    private void ensureTagNameReplacementList() {
        if (this.tagReplacementList == null) {
            this.tagReplacementList = new ArrayList();
        }
    }

    private void changeTagName(String str, String str2) {
        Nodes query = this.htmlElement.query("//*[local-name()='" + str + "']");
        for (int i = 0; i < query.size(); i++) {
            replaceNode((Element) query.get(i), str2);
        }
    }

    private void replaceNode(Element element, String str) {
        HtmlElement createElementFromTag = new HtmlFactory().createElementFromTag(str);
        XMLUtil.copyAttributes(element, createElementFromTag);
        int childCount = element.getChildCount();
        for (int i = 0; i < childCount; i++) {
            Node child = element.getChild(0);
            child.detach();
            createElementFromTag.appendChild(child);
        }
    }

    protected void normalizeDivStructure() {
    }

    protected void normalizeCharacters() {
    }

    public HtmlElement getHtmlElement() {
        return this.htmlElement;
    }

    public void setHtmlElement(HtmlElement htmlElement) {
        this.htmlElement = htmlElement;
    }
}
