package edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer;

import edu.northwestern.at.utils.ListFactory;
import java.util.List;
import java.util.StringTokenizer;

/* loaded from: input_file:edu/northwestern/at/morphadorner/corpuslinguistics/tokenizer/ContractionTokenizer.class */
public class ContractionTokenizer extends AbstractWordTokenizer implements WordTokenizer {
    public static String prepareTextForTokenization(String str) {
        return str.replaceAll("([^'])' ", "$1 ' ").replaceAll("'([sSmMdD]) ", " '$1 ").replaceAll("'ll ", " 'll ").replaceAll("'re ", " 're ").replaceAll("'ve ", " 've ").replaceAll("n't ", " n't ").replaceAll("'t ", " 't ").replaceAll("'s ", " 's ").replaceAll("'LL ", " 'LL ").replaceAll("'RE ", " 'RE ").replaceAll("'VE ", " 'VE ").replaceAll("N'T ", " N'T ").replaceAll("'T ", " 'T ").replaceAll("'S ", " 'S ").replaceAll(" ([Cc])annot ", " $1an not ").replaceAll(" ([Dd])'ye ", " $1' ye ").replaceAll(" ([Gg])imme ", " $1im me ").replaceAll(" ([Gg])onna ", " $1on na ").replaceAll(" ([Gg])otta ", " $1ot ta ").replaceAll(" ([Ll])emme ", " $1em me ").replaceAll(" ([Mm])ore'n ", " $1ore 'n ").replaceAll(" '([Tt])is ", " '$1 is ").replaceAll(" '([Tt])was ", " '$1 was ").replaceAll(" ([Ww])anna ", " $1an na ").trim();
    }

    @Override // edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.AbstractWordTokenizer, edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer
    public List<String> extractWords(String str) {
        List<String> createNewList = ListFactory.createNewList();
        StringTokenizer stringTokenizer = new StringTokenizer(prepareTextForTokenization(str));
        while (stringTokenizer.hasMoreTokens()) {
            createNewList.add(stringTokenizer.nextToken());
        }
        return createNewList;
    }
}
