package org.xmlcml.norma.biblio;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nu.xom.Node;
import org.xmlcml.html.HtmlB;
import org.xmlcml.html.HtmlDiv;
import org.xmlcml.html.HtmlP;

/* loaded from: input_file:org/xmlcml/norma/biblio/BiblioAbstractAnalyzer.class */
public class BiblioAbstractAnalyzer {
    public Set<String> sectionSet = new HashSet();
    private Multiset<String> sectionMultiset;
    public static final Pattern ABSTRACT_SECT_PATTERN = Pattern.compile("([A-Z]{3,}):");
    private static final String _ANONYMOUS = "_ANONYMOUS";

    private Set<String> createSectionSet() {
        this.sectionSet.add("ABSI");
        this.sectionSet.add("ABSTRACT");
        this.sectionSet.add("ACQUISITION");
        this.sectionSet.add("ACR");
        this.sectionSet.add("ACTIVITY");
        this.sectionSet.add("AHI");
        this.sectionSet.add("AHR");
        this.sectionSet.add("AIM");
        this.sectionSet.add("AIMS");
        this.sectionSet.add("ALREADY");
        this.sectionSet.add("ANALYSES");
        this.sectionSet.add("ANALYSIS");
        this.sectionSet.add("ANP");
        this.sectionSet.add("ANSWER");
        this.sectionSet.add("AOR");
        this.sectionSet.add("ARFS");
        this.sectionSet.add("ART");
        this.sectionSet.add("ASPECTS");
        this.sectionSet.add("ASSESSMENT");
        this.sectionSet.add("AUC");
        this.sectionSet.add("BACKGROUND");
        this.sectionSet.add("BACKGROUNDS");
        this.sectionSet.add("BDES");
        this.sectionSet.add("BMES");
        this.sectionSet.add("BMI");
        this.sectionSet.add("BNP");
        this.sectionSet.add("BPH");
        this.sectionSet.add("BSA");
        this.sectionSet.add("BSI");
        this.sectionSet.add("BTT");
        this.sectionSet.add("CABG");
        this.sectionSet.add("CAD");
        this.sectionSet.add("CALCULATOR");
        this.sectionSet.add("CAUTION");
        this.sectionSet.add("CHANCE");
        this.sectionSet.add("CHARM");
        this.sectionSet.add("CHD");
        this.sectionSet.add("CMDS");
        this.sectionSet.add("COMMENTS");
        this.sectionSet.add("COMPLICATIONS");
        this.sectionSet.add("CONCERN");
        this.sectionSet.add("CONCLUSION");
        this.sectionSet.add("CONCLUSIONS");
        this.sectionSet.add("CONCLUSIONSX");
        this.sectionSet.add("CONSUMPTION");
        this.sectionSet.add("CONTENT");
        this.sectionSet.add("CONTEXT");
        this.sectionSet.add("COPD");
        this.sectionSet.add("COSTS");
        this.sectionSet.add("COUCLUSION");
        this.sectionSet.add("CPT");
        this.sectionSet.add("CRC");
        this.sectionSet.add("CRF");
        this.sectionSet.add("CRITERIA");
        this.sectionSet.add("CRP");
        this.sectionSet.add("CSS");
        this.sectionSet.add("CVD");
        this.sectionSet.add("CWP");
        this.sectionSet.add("DATA");
        this.sectionSet.add("DBP");
        this.sectionSet.add("DCL");
        this.sectionSet.add("DEFINITION");
        this.sectionSet.add("DESIGN");
        this.sectionSet.add("DFS");
        this.sectionSet.add("DISCUSSION");
        this.sectionSet.add("DISEASE");
        this.sectionSet.add("DISSEMINATION");
        this.sectionSet.add("DSS");
        this.sectionSet.add("DURATION");
        this.sectionSet.add("ECW");
        this.sectionSet.add("EGAT");
        this.sectionSet.add("ENDPOINT");
        this.sectionSet.add("ERI");
        this.sectionSet.add("EVIDENCE");
        this.sectionSet.add("EXPOSURE");
        this.sectionSet.add("EXPOSURES");
        this.sectionSet.add("EXTRACTION");
        this.sectionSet.add("FACTORS");
        this.sectionSet.add("FCSRT");
        this.sectionSet.add("FFM");
        this.sectionSet.add("FINDINGS");
        this.sectionSet.add("FMH");
        this.sectionSet.add("FRACTURE");
        this.sectionSet.add("FUNDING");
        this.sectionSet.add("GFR");
        this.sectionSet.add("GNRI");
        this.sectionSet.add("GOALS");
        this.sectionSet.add("GROUP");
        this.sectionSet.add("HCG");
        this.sectionSet.add("HDL");
        this.sectionSet.add("HMG");
        this.sectionSet.add("HMSO");
        this.sectionSet.add("HPW");
        this.sectionSet.add("HRQOL");
        this.sectionSet.add("HRR");
        this.sectionSet.add("HUC");
        this.sectionSet.add("HYPOTHESIS");
        this.sectionSet.add("IBW");
        this.sectionSet.add("IHD");
        this.sectionSet.add("III");
        this.sectionSet.add("IMPACT");
        this.sectionSet.add("IMPLICATIONS");
        this.sectionSet.add("IMPORTANCE");
        this.sectionSet.add("INDICATIONS");
        this.sectionSet.add("INTERPRETATION");
        this.sectionSet.add("INTERVENTION");
        this.sectionSet.add("INTERVENTIONS");
        this.sectionSet.add("INTRODUCTION");
        this.sectionSet.add("IQR");
        this.sectionSet.add("IRR");
        this.sectionSet.add("ISSUE");
        this.sectionSet.add("JIB");
        this.sectionSet.add("LAP");
        this.sectionSet.add("LGA");
        this.sectionSet.add("LIMITATION");
        this.sectionSet.add("LIMITATIONS");
        this.sectionSet.add("LNY");
        this.sectionSet.add("LOCAL");
        this.sectionSet.add("LPJ");
        this.sectionSet.add("LRF");
        this.sectionSet.add("LRYGBP");
        this.sectionSet.add("LVC");
        this.sectionSet.add("LVH");
        this.sectionSet.add("LVM");
        this.sectionSet.add("LVRS");
        this.sectionSet.add("MACCE");
        this.sectionSet.add("MACE");
        this.sectionSet.add("MAP");
        this.sectionSet.add("MATERIALS");
        this.sectionSet.add("MEASURE");
        this.sectionSet.add("MEASUREMENT");
        this.sectionSet.add("MEASUREMENTS");
        this.sectionSet.add("MEASURES");
        this.sectionSet.add("MEDIAN");
        this.sectionSet.add("METHOD");
        this.sectionSet.add("METHODOLOGY");
        this.sectionSet.add("METHODS");
        this.sectionSet.add("MMF");
        this.sectionSet.add("MMSE");
        this.sectionSet.add("MPI");
        this.sectionSet.add("MREC");
        this.sectionSet.add("MRNYGBP");
        this.sectionSet.add("MTGC");
        this.sectionSet.add("NEED");
        this.sectionSet.add("NOD");
        this.sectionSet.add("NODAT");
        this.sectionSet.add("NPW");
        this.sectionSet.add("NUMBER");
        this.sectionSet.add("NURSING");
        this.sectionSet.add("NUTRITION");
        this.sectionSet.add("OBJECT");
        this.sectionSet.add("OBJECTIVE");
        this.sectionSet.add("OBJECTIVES");
        this.sectionSet.add("OBJECTS");
        this.sectionSet.add("OBJETIVE");
        this.sectionSet.add("OPTIONS");
        this.sectionSet.add("OUTCOME");
        this.sectionSet.add("OUTCOMES");
        this.sectionSet.add("PAD");
        this.sectionSet.add("PARP");
        this.sectionSet.add("PARTICIPANTS");
        this.sectionSet.add("PATIENT");
        this.sectionSet.add("PATIENTS");
        this.sectionSet.add("PBMC");
        this.sectionSet.add("PCI");
        this.sectionSet.add("PFS");
        this.sectionSet.add("POINT");
        this.sectionSet.add("POPF");
        this.sectionSet.add("POPULATION");
        this.sectionSet.add("PRACTICE");
        this.sectionSet.add("PREDICTOR");
        this.sectionSet.add("PREDICTORS");
        this.sectionSet.add("PREVENTION");
        this.sectionSet.add("PROCEDURE");
        this.sectionSet.add("PROCEDURES");
        this.sectionSet.add("PTDM");
        this.sectionSet.add("PTM");
        this.sectionSet.add("PURPOSE");
        this.sectionSet.add("PURPOSES");
        this.sectionSet.add("QUESTION");
        this.sectionSet.add("RATIONALE");
        this.sectionSet.add("RBCT");
        this.sectionSet.add("RECOMMENDATIONS");
        this.sectionSet.add("REGISTRATION");
        this.sectionSet.add("REGISTRY");
        this.sectionSet.add("REHABILITATION");
        this.sectionSet.add("RELEVANCE");
        this.sectionSet.add("REPORT");
        this.sectionSet.add("RESEARCH");
        this.sectionSet.add("RESULT");
        this.sectionSet.add("RESULTS");
        this.sectionSet.add("REVIEW");
        this.sectionSet.add("RFO");
        this.sectionSet.add("RFS");
        this.sectionSet.add("RHR");
        this.sectionSet.add("RII");
        this.sectionSet.add("RISKS");
        this.sectionSet.add("RYGB");
        this.sectionSet.add("SABR");
        this.sectionSet.add("SAMPLE");
        this.sectionSet.add("SBP");
        this.sectionSet.add("SELECTION");
        this.sectionSet.add("SEM");
        this.sectionSet.add("SETTING");
        this.sectionSet.add("SETTINGS");
        this.sectionSet.add("SHR");
        this.sectionSet.add("SIDS");
        this.sectionSet.add("SIGNIFICANCE");
        this.sectionSet.add("SIHC");
        this.sectionSet.add("SLR");
        this.sectionSet.add("SMOKING");
        this.sectionSet.add("SMR");
        this.sectionSet.add("SOURCE");
        this.sectionSet.add("SOURCES");
        this.sectionSet.add("SPLS");
        this.sectionSet.add("SPONSORS");
        this.sectionSet.add("SPONSORSHIP");
        this.sectionSet.add("SRH");
        this.sectionSet.add("SSI");
        this.sectionSet.add("STATUS");
        this.sectionSet.add("STRATEGY");
        this.sectionSet.add("STUDIES");
        this.sectionSet.add("STUDY");
        this.sectionSet.add("SUBJECTS");
        this.sectionSet.add("SUMMARY");
        this.sectionSet.add("SURVIVORS");
        this.sectionSet.add("SYNTHESIS");
        this.sectionSet.add("TBW");
        this.sectionSet.add("TECHNIQUES");
        this.sectionSet.add("TECHNOLOGY");
        this.sectionSet.add("TESTING");
        this.sectionSet.add("TFC");
        this.sectionSet.add("THERAPY");
        this.sectionSet.add("TREATMENT");
        this.sectionSet.add("TRIAL");
        this.sectionSet.add("TYPE");
        this.sectionSet.add("UPE");
        this.sectionSet.add("URL");
        this.sectionSet.add("UTN");
        this.sectionSet.add("VALIDATION");
        this.sectionSet.add("VALUES");
        this.sectionSet.add("VARIABLE");
        this.sectionSet.add("VARIABLES");
        this.sectionSet.add("WEIGHT");
        this.sectionSet.add("XRT");
        return this.sectionSet;
    }

    public BiblioAbstractAnalyzer() {
        createSectionSet();
        this.sectionMultiset = HashMultiset.create();
    }

    public void analyze(RISEntry rISEntry) {
        createAndAnalyzeSections(rISEntry.getAbstractString());
    }

    public void addToMultiset(String str) {
        this.sectionMultiset.add(str);
    }

    public Multiset<String> getSectionMultiset() {
        return this.sectionMultiset;
    }

    public Set<String> getSectionSet() {
        return this.sectionSet;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public HtmlDiv createAndAnalyzeSections(String str) {
        if (str == null) {
            return null;
        }
        HtmlDiv htmlDiv = new HtmlDiv();
        HtmlB htmlB = new HtmlB();
        htmlB.appendChild("ENTRY");
        htmlDiv.appendChild(htmlB);
        Matcher matcher = ABSTRACT_SECT_PATTERN.matcher(str);
        int i = 0;
        int i2 = 0;
        String str2 = "";
        Node htmlP = new HtmlP();
        htmlDiv.appendChild(htmlP);
        while (matcher.find(i)) {
            i2 = matcher.end();
            int start = matcher.start();
            htmlP.setValue(str2 + ": " + str.substring(i, start));
            if (i != 0 || start == 0) {
                str2 = matcher.group(1);
                if (!getSectionSet().contains(str2)) {
                    System.err.println(">> unknown section " + str2);
                    getSectionSet().add(str2);
                }
                addToMultiset(str2);
            } else {
                str2 = _ANONYMOUS;
            }
            htmlP = new HtmlP();
            htmlDiv.appendChild(htmlP);
            htmlP.setClassAttribute(str2);
            i = i2;
        }
        htmlP.setValue(str2 + ": " + str.substring(i2));
        return htmlDiv;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public HtmlDiv createAbstractList(RISParser rISParser) {
        HtmlDiv htmlDiv = new HtmlDiv();
        htmlDiv.setClassAttribute(BiblioAbstractList.TAG);
        rISParser.getEntries();
        Iterator<RISEntry> it = rISParser.entryList.iterator();
        while (it.hasNext()) {
            String abstractString = it.next().getAbstractString();
            HtmlDiv createAndAnalyzeSections = createAndAnalyzeSections(abstractString);
            if (abstractString != null) {
                htmlDiv.appendChild(createAndAnalyzeSections);
            }
        }
        return htmlDiv;
    }
}
