package uk.ac.cam.ch.wwmm.oscar.oscarcli;

import com.sampullara.cli.Args;
import com.sampullara.cli.Argument;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.List;
import net.htmlparser.jericho.Source;
import uk.ac.cam.ch.wwmm.oscar.Oscar;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;
import uk.ac.cam.ch.wwmm.oscar.opsin.OpsinDictionary;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscar/oscarcli/TokeniserCLI.class */
public class TokeniserCLI {

    @Argument(description = "If true, reads the input from STDIN.")
    private boolean stdin = false;

    @Argument(description = "If true, the input is HTML.")
    private boolean html = false;
    private Oscar oscar = new Oscar();

    public TokeniserCLI() throws Exception {
        this.oscar.getDictionaryRegistry().register(new OpsinDictionary());
    }

    public void processLine(String str) throws Exception {
        Iterator<TokenSequence> it = this.oscar.tokenise(str).iterator();
        while (it.hasNext()) {
            Iterator<Token> it2 = it.next().getTokens().iterator();
            while (it2.hasNext()) {
                System.out.println(it2.next().getSurface());
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        TokeniserCLI tokeniserCLI = new TokeniserCLI();
        List<String> parse = Args.parse(tokeniserCLI, strArr);
        StringBuilder sb = new StringBuilder();
        if (tokeniserCLI.stdin) {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                } else {
                    sb.append(readLine);
                }
            }
        } else {
            Iterator<String> it = parse.iterator();
            while (it.hasNext()) {
                sb.append(it.next()).append(' ');
            }
        }
        String sb2 = sb.toString();
        if (tokeniserCLI.html) {
            sb2 = new Source(sb2).getTextExtractor().toString();
        }
        tokeniserCLI.processLine(sb2);
    }
}
