/*
 * Decompiled with CFR 0.152.
 */
package org.commoncrawl.hadoop.io;

import java.io.IOException;
import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;
import org.commoncrawl.hadoop.io.ARCSource;
import org.commoncrawl.hadoop.io.ARCSplit;
import org.commoncrawl.hadoop.io.ARCSplitReader;
import org.commoncrawl.hadoop.io.JetS3tARCSource;
import org.commoncrawl.util.shared.ArcFileReader;

public class ARCInputFormat
implements InputFormat,
JobConfigurable {
    public static final String P_IO_BLOCK_SIZE = "arc.input.format.io.block.size";
    public static final String P_IO_BUFFER_SIZE = "arc.input.format.io.buffer.size";
    public static final String P_IO_TIMEOUT = "arc.input.format.io.timeout.ms";
    public static final String P_ARC_SOURCE = "arc.input.format.arc.source.class";
    private static final Log LOG = LogFactory.getLog(ARCInputFormat.class);
    private int blockSize;
    private ARCSource arcSource;

    public static void setIOBlockSize(JobConf job, int blockSize) {
        job.setInt(P_IO_BLOCK_SIZE, blockSize);
    }

    public static void setIOBufferSize(JobConf job, int bufferSize) {
        job.setInt(P_IO_BUFFER_SIZE, bufferSize);
    }

    public static void setIOTimeout(JobConf job, long milliseconds) {
        job.setLong(P_IO_TIMEOUT, milliseconds);
    }

    public static void setARCSourceClass(JobConf job, Class arcSource) {
        job.setClass(P_ARC_SOURCE, arcSource, ARCSource.class);
    }

    public void configure(JobConf job) {
        this.blockSize = job.getInt(P_IO_BLOCK_SIZE, 32768);
        int bufferSize = job.getInt(P_IO_BUFFER_SIZE, 0xA00000);
        int queueSize = Math.max(1, bufferSize / this.blockSize);
        int timeout = job.getInt(P_IO_TIMEOUT, 60000);
        ArcFileReader.setBlockSize(this.blockSize);
        ArcFileReader.setBufferQueueSize(queueSize);
        ArcFileReader.setIOTimeoutValue(timeout);
        LOG.info((Object)("Block Size: " + this.blockSize));
        LOG.info((Object)("Queue Size: " + queueSize));
        LOG.info((Object)("IO Timeout: " + timeout));
        Class archiveSourceClass = job.getClass(P_ARC_SOURCE, JetS3tARCSource.class, ARCSource.class);
        this.arcSource = (ARCSource)ReflectionUtils.newInstance((Class)archiveSourceClass, (Configuration)job);
    }

    public InputSplit[] getSplits(JobConf job, int ignored) throws IOException {
        Object[] splits = this.arcSource.getARCSplits(job);
        if (splits.length < 1) {
            throw new IOException("No input to process");
        }
        LOG.info((Object)("Processing splits: " + Arrays.toString(splits)));
        return splits;
    }

    public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
        return new ARCSplitReader(job, (ARCSplit)split, this.arcSource, this.blockSize);
    }

    public void validateInput(JobConf job) throws IOException {
    }
}

