package uk.ac.cam.ch.wwmm.chemicaltagger;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.types.BioTag;
import uk.ac.cam.ch.wwmm.oscar.types.BioType;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/chemicaltagger/WhiteSpaceTokeniser.class */
public class WhiteSpaceTokeniser implements ChemicalTaggerTokeniser {
    private static Pattern tokenPattern = Pattern.compile("\\S+");

    @Override // uk.ac.cam.ch.wwmm.chemicaltagger.ChemicalTaggerTokeniser
    public List<Token> tokenise(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = tokenPattern.matcher(str);
        int i = 0;
        while (matcher.find()) {
            Token token = new Token(matcher.group(), matcher.start(), matcher.end(), null, new BioType(BioTag.O), null);
            int i2 = i;
            i++;
            token.setIndex(i2);
            arrayList.add(token);
        }
        return arrayList;
    }
}
