package org.xmlcml.ami2.plugins.phylotree;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:org/xmlcml/ami2/plugins/phylotree/ScientificNameList.class */
public class ScientificNameList {
    private static final String SCIENTIFIC_NAME = "scientific name";
    private static final Pattern BINOMIAL = Pattern.compile("([A-Z][a-z]+\\s+[a-z\\-]+)\\s+.*");
    private static final Pattern GENUS = Pattern.compile("([A-Z][a-z]+)\\s+.*");
    private static final Pattern CLASS = Pattern.compile(".*(<[a-z][^>]*>).*");
    private static final File TAXDUMP = new File("src/main/resources/org/xmlcml/ami2/plugins/phylotree/taxdump/");

    public void readTaxdump(File file) throws IOException {
        List<String> readLines = FileUtils.readLines(file);
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        HashSet hashSet4 = new HashSet();
        Iterator<String> it = readLines.iterator();
        while (it.hasNext()) {
            String trim = it.next().replaceAll("\\s+", " ").trim();
            String[] split = trim.substring(1, trim.length() - 1).split("\\|");
            split[1] = split[1].trim();
            split[2] = split[2].trim();
            split[3] = split[3].trim();
            Matcher matcher = CLASS.matcher(split[2]);
            if (matcher.matches()) {
                hashSet4.add(matcher.group(1));
            }
            hashSet3.add(split[3]);
            if (!split[1].contains("virus")) {
                split[1] = split[1] + " ";
                if (SCIENTIFIC_NAME.equals(split[3])) {
                    Matcher matcher2 = BINOMIAL.matcher(split[1]);
                    if (matcher2.matches()) {
                        hashSet.add(matcher2.group(1));
                    } else {
                        Matcher matcher3 = GENUS.matcher(split[1]);
                        if (matcher3.matches()) {
                            hashSet2.add(matcher3.group(1));
                        }
                    }
                }
            }
        }
        writeSortedSet(new File(TAXDUMP, "binomial.txt"), hashSet);
        writeSortedSet(new File(TAXDUMP, "genus.txt"), hashSet2);
        writeSortedSet(new File(TAXDUMP, "class.txt"), hashSet4);
        writeSortedSet(new File(TAXDUMP, "role.txt"), hashSet3);
    }

    private void writeSortedSet(File file, Set<String> set) throws IOException {
        ArrayList arrayList = new ArrayList(set);
        Collections.sort(arrayList);
        FileUtils.writeLines(file, arrayList);
    }

    public static void main(String[] strArr) throws Exception {
        new ScientificNameList().readTaxdump(new File("src/main/resources/org/xmlcml/ami2/plugins/phylotree/taxdump/names.dmp"));
    }
}
