package uk.ac.cam.ch.wwmm.oscar.document;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import nu.xom.Element;
import uk.ac.cam.ch.wwmm.oscar.tools.StringTools;
import uk.ac.cam.ch.wwmm.oscar.types.BioTag;
import uk.ac.cam.ch.wwmm.oscar.types.BioType;
import uk.ac.cam.ch.wwmm.oscar.types.NamedEntityType;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscar/document/TokenSequence.class */
public final class TokenSequence {
    private static final BioType B_CPR = new BioType(BioTag.B, NamedEntityType.LOCANTPREFIX);
    private String surface;
    private int offset;
    private IProcessingDocument doc;
    private List<Token> tokens;
    private Element elem;

    public TokenSequence(String str, int i, IProcessingDocument iProcessingDocument, List<Token> list) {
        this.surface = str;
        this.offset = i;
        this.doc = iProcessingDocument;
        this.tokens = list;
    }

    public String getSurface() {
        return this.surface;
    }

    public IProcessingDocument getDoc() {
        return this.doc;
    }

    public int getOffset() {
        return this.offset;
    }

    public List<Token> getTokens() {
        return this.tokens;
    }

    public List<Token> getTokens(int i, int i2) {
        return this.tokens.subList(i, i2 + 1);
    }

    public Token getToken(int i) {
        return this.tokens.get(i);
    }

    public void setElem(Element element) {
        this.elem = element;
    }

    public Element getElem() {
        return this.elem;
    }

    public int getSize() {
        return this.tokens.size();
    }

    public List<String> getTokenStringList() {
        ArrayList arrayList = new ArrayList();
        Iterator<Token> it = this.tokens.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getSurface());
        }
        return arrayList;
    }

    public String getSubstring(int i, int i2) {
        if (i2 >= getSize()) {
            i2 = getSize() - 1;
        }
        return this.surface.substring(this.tokens.get(i).getStart() - this.offset, this.tokens.get(i2).getEnd() - this.offset);
    }

    public String getStringAtOffsets(int i, int i2) {
        return this.surface.substring(i - this.offset, i2 - this.offset);
    }

    public Set<String> getAfterHyphens() {
        HashSet hashSet = new HashSet();
        for (int i = 1; i < this.tokens.size(); i++) {
            if (i < this.tokens.size() - 1 && this.tokens.get(i).getSurface().length() == 1 && StringTools.isHyphen(this.tokens.get(i).getSurface()) && BioTag.O == this.tokens.get(i).getBioType().getBio() && BioTag.O == this.tokens.get(i + 1).getBioType().getBio() && BioTag.O != this.tokens.get(i - 1).getBioType().getBio() && this.tokens.get(i).getStart() == this.tokens.get(i - 1).getEnd() && this.tokens.get(i).getEnd() == this.tokens.get(i + 1).getStart()) {
                hashSet.add(this.tokens.get(i + 1).getSurface());
            } else if (BioTag.O == this.tokens.get(i).getBioType().getBio() && B_CPR == this.tokens.get(i - 1).getBioType() && this.tokens.get(i).getStart() == this.tokens.get(i - 1).getEnd()) {
                hashSet.add(this.tokens.get(i).getSurface());
            }
        }
        return hashSet;
    }

    public Map<NamedEntityType, List<List<String>>> getNes() {
        HashMap hashMap = new HashMap();
        NamedEntityType namedEntityType = null;
        ArrayList arrayList = null;
        for (Token token : this.tokens) {
            if (namedEntityType == null) {
                if (BioTag.O != token.getBioType().getBio()) {
                    arrayList = new ArrayList();
                    namedEntityType = token.getBioType().getType();
                    arrayList.add(token.getSurface());
                    if (!hashMap.containsKey(namedEntityType)) {
                        hashMap.put(namedEntityType, new ArrayList());
                    }
                    ((List) hashMap.get(namedEntityType)).add(arrayList);
                }
            } else if (BioTag.O == token.getBioType().getBio()) {
                namedEntityType = null;
                arrayList = null;
            } else if (token.getBioType().getBio() == BioTag.B) {
                arrayList = new ArrayList();
                namedEntityType = token.getBioType().getType();
                arrayList.add(token.getSurface());
                if (!hashMap.containsKey(namedEntityType)) {
                    hashMap.put(namedEntityType, new ArrayList());
                }
                ((List) hashMap.get(namedEntityType)).add(arrayList);
            } else {
                arrayList.add(token.getSurface());
            }
        }
        return hashMap;
    }

    public List<String> getNonNes() {
        ArrayList arrayList = new ArrayList();
        for (Token token : this.tokens) {
            if (BioTag.O == token.getBioType().getBio()) {
                arrayList.add(token.getSurface());
            }
        }
        return arrayList;
    }

    public Token getTokenByStartIndex(int i) {
        checkIndex(i);
        for (Token token : this.tokens) {
            if (token.getStart() == i) {
                return token;
            }
        }
        return null;
    }

    private void checkIndex(int i) {
        if (i < this.offset) {
            throw new ArrayIndexOutOfBoundsException("index " + i + " occurs before the beginning of this token sequence");
        }
        if (i > this.offset + this.surface.length()) {
            throw new ArrayIndexOutOfBoundsException("index " + i + " occurs after the end of this token sequence");
        }
    }

    public Token getTokenByEndIndex(int i) {
        checkIndex(i);
        for (Token token : this.tokens) {
            if (token.getEnd() == i) {
                return token;
            }
        }
        return null;
    }
}
