package org.xmlcml.xhtml2stm.visitor.sequence;

import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.xmlcml.xhtml2stm.Type;
import org.xmlcml.xhtml2stm.visitable.VisitableContainer;
import org.xmlcml.xhtml2stm.visitable.html.HtmlContainer;
import org.xmlcml.xhtml2stm.visitable.html.HtmlVisitable;
import org.xmlcml.xhtml2stm.visitable.image.ImageVisitable;
import org.xmlcml.xhtml2stm.visitable.pdf.PDFVisitable;
import org.xmlcml.xhtml2stm.visitable.svg.SVGVisitable;
import org.xmlcml.xhtml2stm.visitable.table.TableVisitable;
import org.xmlcml.xhtml2stm.visitable.xml.XMLContainer;
import org.xmlcml.xhtml2stm.visitable.xml.XMLVisitable;
import org.xmlcml.xhtml2stm.visitor.AbstractVisitor;

/* loaded from: input_file:org/xmlcml/xhtml2stm/visitor/sequence/SequenceVisitor.class */
public class SequenceVisitor extends AbstractVisitor {
    private static final Logger LOG = Logger.getLogger(SequenceVisitor.class);
    private static final String DNA_REGEX_S = "\\s*[ACGT]{6,}\\s*";
    private static final Pattern DNA_PATTERN = Pattern.compile(DNA_REGEX_S);
    private static final String RNA_REGEX_S = "\\s*[ACGU]{6,}\\s*";
    private static final Pattern RNA_PATTERN = Pattern.compile(RNA_REGEX_S);

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/xmlcml/xhtml2stm/visitor/sequence/SequenceVisitor$SequenceType.class */
    public enum SequenceType implements Type {
        DNA,
        G
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(HtmlVisitable htmlVisitable) {
        ensureResultsElement();
        Iterator<HtmlContainer> it = htmlVisitable.getHtmlContainerList().iterator();
        while (it.hasNext()) {
            searchContainer(it.next(), DNA_PATTERN, null, SequenceType.DNA);
        }
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(XMLVisitable xMLVisitable) {
        Iterator<XMLContainer> it = xMLVisitable.getXMLContainerList().iterator();
        while (it.hasNext()) {
            searchContainer(it.next(), DNA_PATTERN, null, SequenceType.DNA);
        }
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(ImageVisitable imageVisitable) {
        notYetImplemented(imageVisitable);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(PDFVisitable pDFVisitable) {
        notYetImplemented(pDFVisitable);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(SVGVisitable sVGVisitable) {
        notYetImplemented(sVGVisitable);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void visit(TableVisitable tableVisitable) {
        notYetImplemented(tableVisitable);
    }

    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public String getDescription() {
        return "Extracts sequences.";
    }

    private void searchContainer(VisitableContainer visitableContainer, Pattern pattern, String str, SequenceType sequenceType) {
        this.sourceElement.appendChild(new SequenceListElement(sequenceType, createMultiset(visitableContainer, pattern, str, sequenceType)));
        LOG.trace("===" + visitableContainer.getName() + "===");
    }

    public static void main(String[] strArr) throws Exception {
        new SequenceVisitor().processArgs(strArr);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.xmlcml.xhtml2stm.visitor.AbstractVisitor
    public void usage() {
        System.err.println("Sequence: ");
        super.usage();
    }
}
