package de.l3s.concatgz.io;

import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.io.SequenceInputStream;
import java.util.zip.GZIPInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.archive.format.warc.WARCConstants;
import org.archive.resource.ResourceConstants;
import org.eclipse.jdt.internal.compiler.classfmt.ClassFileConstants;
import repackaged.com.google.common.google.common.io.FileBackedOutputStream;

/* loaded from: input_file:de/l3s/concatgz/io/ConcatGzipInputFormat.class */
public class ConcatGzipInputFormat extends FileInputFormat<Text, FileBackedBytesWritable> {

    /* loaded from: input_file:de/l3s/concatgz/io/ConcatGzipInputFormat$ConcatGzipRecordReader.class */
    public static class ConcatGzipRecordReader extends RecordReader<Text, FileBackedBytesWritable> {
        static final int BUFFER_SIZE = 8192;
        static final byte FIRST_GZIP_BYTE = 31;
        static final byte SECOND_GZIP_BYTE = -117;
        private long start = 0;
        private long end = 0;
        private long pos = 0;
        private long lastRecordOffset = 0;
        private String filename = null;
        private InputStream in = null;
        private Text key = new Text();
        private FileBackedBytesWritable value = new FileBackedBytesWritable();
        private PushbackInputStream cache = null;
        private boolean hasNext = false;
        private boolean isWARC = false;

        public long getPos() {
            return this.pos;
        }

        public boolean getHasNext() {
            return this.hasNext;
        }

        public String getFilename() {
            return this.filename;
        }

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            FileSplit fileSplit = (FileSplit) inputSplit;
            Configuration configuration = taskAttemptContext.getConfiguration();
            this.start = fileSplit.getStart();
            this.pos = this.start;
            this.end = this.start + fileSplit.getLength();
            Path path = fileSplit.getPath();
            this.filename = path.getName();
            FSDataInputStream open = path.getFileSystem(configuration).open(path, 8192);
            open.seek(this.start);
            this.in = open;
            if (this.filename.contains(WARCConstants.WARC_FILE_EXTENSION)) {
                this.isWARC = true;
            }
            this.hasNext = true;
        }

        public void initialize(String str) throws IOException, InterruptedException {
            String name = new Path(str).getName();
            FileInputStream fileInputStream = new FileInputStream(str);
            fileInputStream.skip(this.start);
            initialize(fileInputStream, name);
        }

        public void initialize(InputStream inputStream, String str) throws IOException, InterruptedException {
            this.start = 0L;
            this.pos = this.start;
            this.end = ClassFileConstants.JDK_DEFERRED;
            this.hasNext = true;
            this.filename = str;
            this.in = inputStream;
            if (this.filename.contains(WARCConstants.WARC_FILE_EXTENSION)) {
                this.isWARC = true;
            }
        }

        private boolean skipToNextRecord(FileBackedOutputStream fileBackedOutputStream) throws IOException {
            int read;
            int findAndCheckGZIP;
            int i;
            byte[] bArr = new byte[8192];
            long j = this.pos;
            if (this.cache == null) {
                this.cache = new PushbackInputStream(this.in, 8192);
            }
            do {
                read = this.cache.read(bArr);
                if (read <= 0) {
                    return false;
                }
                j += read;
                findAndCheckGZIP = findAndCheckGZIP(bArr, 0, read);
                i = findAndCheckGZIP;
            } while (findAndCheckGZIP == -1);
            this.lastRecordOffset = j - (read - i);
            int i2 = i;
            int i3 = i + 2 < read - 2 ? i + 2 : read;
            while (read > 0) {
                int findAndCheckGZIP2 = findAndCheckGZIP(bArr, i3, read);
                i = findAndCheckGZIP2;
                if (findAndCheckGZIP2 != -1) {
                    break;
                }
                fileBackedOutputStream.write(bArr, i2, read - i2);
                if (i2 != 0) {
                    i2 = 0;
                }
                if (i3 != 0) {
                    i3 = 0;
                }
                read = this.cache.read(bArr);
                j += read;
            }
            fileBackedOutputStream.write(bArr, i2, Math.max(0, i) - i2);
            this.pos = j - (Math.max(0, read) - Math.max(0, i));
            this.cache.unread(bArr, Math.max(0, i), Math.max(0, read) - Math.max(0, i));
            return this.pos <= this.end;
        }

        private int findAndCheckGZIP(byte[] bArr, int i, int i2) throws IOException {
            for (int i3 = i; i3 < i2 - 1; i3++) {
                if (bArr[i3] == 31 && bArr[i3 + 1] == -117 && checkGzip(bArr, i3, i2 - i3)) {
                    return i3;
                }
            }
            if (bArr[i2 - 1] == 31 && checkGzip(bArr, i2 - 1, 1)) {
                return i2 - 1;
            }
            return -1;
        }

        private boolean checkGzip(byte[] bArr, int i, int i2) throws IOException {
            InputStream inputStream;
            byte[] bArr2 = null;
            InputStream byteArrayInputStream = new ByteArrayInputStream(bArr, i, i2);
            if (i2 < 512) {
                bArr2 = new byte[1024];
                this.cache.read(bArr2);
                inputStream = new SequenceInputStream(byteArrayInputStream, new ByteArrayInputStream(bArr2));
            } else {
                inputStream = byteArrayInputStream;
            }
            try {
                try {
                    byte[] bArr3 = new byte[256];
                    new GZIPInputStream(inputStream).read(bArr3, 0, 256);
                    String str = new String(bArr3, "UTF-8");
                    if (this.isWARC) {
                        if (!str.contains(ResourceConstants.ENVELOPE_FORMAT_WARC)) {
                            inputStream.close();
                            if (bArr2 != null) {
                                this.cache.unread(bArr2);
                            }
                            return false;
                        }
                    }
                    inputStream.close();
                    if (bArr2 != null) {
                        this.cache.unread(bArr2);
                    }
                    return true;
                } catch (IOException e) {
                    inputStream.close();
                    if (bArr2 != null) {
                        this.cache.unread(bArr2);
                    }
                    return false;
                } catch (Exception e2) {
                    e2.printStackTrace();
                    inputStream.close();
                    if (bArr2 != null) {
                        this.cache.unread(bArr2);
                    }
                    return false;
                }
            } catch (Throwable th) {
                inputStream.close();
                if (bArr2 != null) {
                    this.cache.unread(bArr2);
                }
                throw th;
            }
        }

        public float getProgress() throws IOException, InterruptedException {
            return (float) ((Math.min(this.pos, this.end) - this.start) / (this.end - this.start));
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (!this.hasNext) {
                return false;
            }
            try {
                FileBackedOutputStream fileBackedOutputStream = new FileBackedOutputStream(67108864);
                this.hasNext = skipToNextRecord(fileBackedOutputStream);
                this.key.set(this.filename + ":" + this.lastRecordOffset);
                this.hasNext = this.hasNext && this.pos < this.end;
                this.value.closeStream();
                this.value.set(fileBackedOutputStream);
            } catch (Exception e) {
                this.hasNext = false;
                e.printStackTrace();
            }
            return this.hasNext;
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public Text m28getCurrentKey() throws IOException, InterruptedException {
            return this.key;
        }

        /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
        public FileBackedBytesWritable m27getCurrentValue() throws IOException, InterruptedException {
            return this.value;
        }

        public synchronized void close() throws IOException {
            this.value.closeStream();
            this.cache.close();
            this.in.close();
        }
    }

    public RecordReader<Text, FileBackedBytesWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return new ConcatGzipRecordReader();
    }

    protected boolean isSplitable(JobContext jobContext, Path path) {
        return true;
    }

    public static void main(String[] strArr) throws Exception {
        ConcatGzipRecordReader concatGzipRecordReader = new ConcatGzipRecordReader();
        concatGzipRecordReader.initialize(strArr[1]);
        int i = 0;
        while (concatGzipRecordReader.nextKeyValue()) {
            i++;
            System.out.println("Count:" + i + " pos: " + concatGzipRecordReader.pos);
        }
    }
}
