package uk.ac.cam.ch.wwmm.chemicaltagger.modelParser;

import java.io.File;
import java.io.IOException;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Node;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.Text;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import uk.ac.cam.ch.wwmm.chemicaltagger.Utils;
import uk.ac.cam.ch.wwmm.chemicaltagger.XMLtoAST;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/modelParser/CreateTreeBank.class */
public class CreateTreeBank {
    private static final Logger LOG = Logger.getLogger(CreateTreeBank.class);

    public String getContent(String str) {
        Builder builder = new Builder();
        LOG.info("Extracting data from " + str);
        String str2 = "";
        try {
            Document build = builder.build(str);
            Nodes query = build.query("//p");
            for (int i = 0; i < query.size(); i++) {
                Node node = query.get(i);
                for (int i2 = 0; i2 < node.getChildCount(); i2++) {
                    if (node.getChild(i2) instanceof Text) {
                        String trim = node.getChild(i2).getValue().trim();
                        if (!trim.toLowerCase().startsWith("tlc") && !trim.toLowerCase().startsWith("mass sp")) {
                            str2 = str2 + " " + trim;
                        }
                    }
                }
            }
            String str3 = "";
            Nodes query2 = build.query("//spectrum");
            for (int i3 = 0; i3 < query2.size(); i3++) {
                String replace = query2.get(i3).getValue().trim().replace("\n", "");
                if (StringUtils.isNotEmpty(replace)) {
                    str3 = str3 + " " + replace;
                }
            }
        } catch (IOException e) {
            LOG.fatal(e.getMessage(), new RuntimeException());
        } catch (ParsingException e2) {
            LOG.fatal("ParsingException " + e2.getMessage(), new RuntimeException());
        }
        return str2;
    }

    public static void main(String[] strArr) throws IOException {
        String str = strArr[0];
        String[] list = new File(str).list();
        XMLtoAST xMLtoAST = new XMLtoAST();
        for (String str2 : list) {
            xMLtoAST.convert(Utils.runChemicalTagger(new CreateTreeBank().getContent(str + str2)));
            Utils.writeListToFile(xMLtoAST.getSentenceList(), "target/" + str2.replace("xml", "txt"));
        }
    }
}
