package uk.ac.cam.ch.wwmm.chemicaltagger;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.xmlcml.euclid.EuclidConstants;
import uk.ac.cam.ch.wwmm.oscar.Oscar;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/OscarTokeniser.class */
public class OscarTokeniser implements ChemicalTaggerTokeniser {
    private Oscar oscar;

    public OscarTokeniser(Oscar oscar) {
        this.oscar = oscar;
    }

    @Override // uk.ac.cam.ch.wwmm.chemicaltagger.ChemicalTaggerTokeniser
    public POSContainer tokenise(POSContainer pOSContainer) {
        ArrayList arrayList = new ArrayList();
        List<TokenSequence> list = this.oscar.tokenise(pOSContainer.getInputText());
        Iterator<TokenSequence> it = list.iterator();
        while (it.hasNext()) {
            Iterator<Token> it2 = it.next().getTokens().iterator();
            while (it2.hasNext()) {
                for (String str : it2.next().getSurface().trim().split(EuclidConstants.S_SPACE)) {
                    if (StringUtils.isNotEmpty(str)) {
                        arrayList.add(str);
                    }
                }
            }
        }
        pOSContainer.setTokenSequenceList(list);
        pOSContainer.setWordTokenList(arrayList);
        return pOSContainer;
    }
}
