package com.chine.invertedindex.mapreduce.posindex;

import com.chine.invertedindex.analysis.Analyzer;
import com.chine.invertedindex.analysis.HTMLChineseTokenizer;
import com.chine.invertedindex.analysis.LowercaseFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.LineReader;

/* loaded from: input_file:com/chine/invertedindex/mapreduce/posindex/TokenInputFormat.class */
public class TokenInputFormat extends FileInputFormat<Text, ValuePair> {

    /* loaded from: input_file:com/chine/invertedindex/mapreduce/posindex/TokenInputFormat$AnalyzerIter.class */
    public static class AnalyzerIter implements Iterator<String> {
        private List<String> iter = new ArrayList();
        private int index = -1;

        public void addAnalyzer(Analyzer analyzer) {
            Iterator<String> it = analyzer.iterator();
            while (it.hasNext()) {
                this.iter.add(it.next());
            }
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            int size = this.iter.size();
            return size != 0 && this.index < size - 1;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            this.index++;
            return this.iter.get(this.index);
        }

        @Override // java.util.Iterator
        public void remove() {
        }
    }

    /* loaded from: input_file:com/chine/invertedindex/mapreduce/posindex/TokenInputFormat$TokenRecordReader.class */
    public static class TokenRecordReader extends RecordReader<Text, ValuePair> {
        private Analyzer analyzer = null;
        private AnalyzerIter iter;
        private long start;
        private long end;
        private long pos;
        private int maxLineLength;
        private LineReader in;
        private Text line;
        private Text key;
        private ValuePair value;
        private String fileName;
        private int tokenPos;

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            this.analyzer = new Analyzer(new HTMLChineseTokenizer());
            try {
                this.analyzer.addFilter(LowercaseFilter.class);
            } catch (Exception e) {
                e.printStackTrace();
            }
            this.iter = new AnalyzerIter();
            FileSplit fileSplit = (FileSplit) inputSplit;
            Configuration configuration = taskAttemptContext.getConfiguration();
            this.maxLineLength = configuration.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
            this.start = fileSplit.getStart();
            this.end = this.start + fileSplit.getLength();
            this.pos = this.start;
            Path path = fileSplit.getPath();
            this.fileName = path.getName();
            this.in = new LineReader(path.getFileSystem(configuration).open(path), configuration);
            this.line = new Text();
            this.key = new Text();
            this.value = new ValuePair();
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean z = false;
            while (true) {
                if (this.iter.hasNext()) {
                    break;
                }
                int readLine = this.in.readLine(this.line, this.maxLineLength, Math.max((int) Math.min(2147483647L, this.end - this.start), this.maxLineLength));
                if (readLine == 0) {
                    z = true;
                    break;
                }
                this.pos += readLine;
                this.analyzer.process(this.line.toString());
                this.iter.addAnalyzer(this.analyzer);
            }
            if (!z) {
                return true;
            }
            this.key = null;
            this.value = null;
            this.line = null;
            return false;
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public Text m6getCurrentKey() throws IOException, InterruptedException {
            this.key.set(this.iter.next());
            this.tokenPos++;
            return this.key;
        }

        /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
        public ValuePair m5getCurrentValue() throws IOException, InterruptedException {
            this.value.set(this.fileName, this.tokenPos);
            return this.value;
        }

        public float getProgress() throws IOException, InterruptedException {
            if (this.start == this.end) {
                return 0.0f;
            }
            return Math.min(1.0f, (float) ((this.pos - this.start) / (this.end - this.start)));
        }

        public void close() throws IOException {
            if (this.in != null) {
                this.in.close();
            }
        }
    }

    protected boolean isSplitable(JobContext jobContext, Path path) {
        return false;
    }

    public RecordReader<Text, ValuePair> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
        return new TokenRecordReader();
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr[0];
        Configuration configuration = new Configuration();
        InputSplit fileSplit = new FileSplit(new Path(str), 0L, 10000000L, (String[]) null);
        TokenRecordReader tokenRecordReader = new TokenRecordReader();
        tokenRecordReader.initialize(fileSplit, new TaskAttemptContext(configuration, new TaskAttemptID("hello", 12, true, 12, 12)));
        while (tokenRecordReader.nextKeyValue()) {
            System.out.println(tokenRecordReader.m6getCurrentKey() + ": " + tokenRecordReader.m5getCurrentValue());
        }
    }
}
