package org.archive.extract;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.archive.RecoverableRecordFormatException;
import org.archive.format.gzip.GZIPFormatException;
import org.archive.resource.Resource;
import org.archive.resource.ResourceConstants;
import org.archive.resource.ResourceParseException;
import org.archive.resource.ResourceProducer;
import org.archive.url.WaybackURLKeyMaker;

/* loaded from: input_file:org/archive/extract/ResourceExtractor.class */
public class ResourceExtractor implements ResourceConstants, Tool {
    private static final Logger LOG = Logger.getLogger(ResourceExtractor.class.getName());
    Charset UTF8 = Charset.forName("utf-8");
    public static final String TOOL_NAME = "extractor";
    public static final String TOOL_DESCRIPTION = "A tool for extracting metadata from WARC, ARC, and WAT files";
    private OutputStream out;
    private Configuration conf;

    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    public Configuration getConf() {
        return this.conf;
    }

    private static int USAGE(int i) {
        System.err.println("Usage:\n");
        System.err.println("extractor [OPT] SRC");
        System.err.println("\tSRC is the local path, HTTP or HDFS URL to an arc, warc, arc.gz, or warc.gz.");
        System.err.println("\tOPT can be one of:");
        System.err.println("\t\t-cdxURL\tProduce output in old URL Wayback CDX format");
        System.err.println("\t\t-cdx\tProduce output in NEW-SURT-Wayback CDX format");
        System.err.println("\t\t\t (note that column 1 is NOT standard Wayback canonicalized)\n");
        System.err.println("\t\t-wat\tembed JSON output in a compressed WARCwrapper, for storage, or sharing.");
        return i;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new ResourceExtractor(), strArr));
    }

    private PrintWriter makePrintWriter(OutputStream outputStream) {
        return new PrintWriter(new OutputStreamWriter(outputStream, Charset.forName("UTF-8")));
    }

    public int run(String[] strArr) throws IndexOutOfBoundsException, FileNotFoundException, IOException, ResourceParseException, URISyntaxException {
        ExtractorOutput dumpingExtractorOutput;
        Resource next;
        if (strArr.length >= 1 && strArr.length <= 4) {
            OutputStream outputStream = this.out == null ? System.out : this.out;
            Logger.getLogger("org.archive").setLevel(Level.WARNING);
            int i = 0;
            if (strArr.length > 0 && strArr[0].equals("-strict")) {
                ProducerUtils.STRICT_GZ = true;
                i = 0 + 1;
            }
            String str = strArr[i];
            String str2 = null;
            if (strArr.length >= i + 2) {
                if (strArr.length == i + 3) {
                    str2 = strArr[i + 2];
                    outputStream.close();
                    outputStream = new FileOutputStream(str2);
                }
                if (strArr[i].equals("-cdx")) {
                    str = strArr[i + 1];
                    dumpingExtractorOutput = new RealCDXExtractorOutput(makePrintWriter(outputStream));
                } else if (strArr[i].equals("-cdxURL")) {
                    str = strArr[i + 1];
                    dumpingExtractorOutput = new RealCDXExtractorOutput(makePrintWriter(outputStream), new WaybackURLKeyMaker(false));
                } else if (strArr[i].equals("-wat")) {
                    str = strArr[i + 1];
                    dumpingExtractorOutput = new WATExtractorOutput(outputStream, str2);
                } else {
                    dumpingExtractorOutput = new JSONViewExtractorOutput(outputStream, strArr[i + 1]);
                }
            } else {
                dumpingExtractorOutput = new DumpingExtractorOutput(outputStream);
            }
            ResourceProducer producer = ProducerUtils.getProducer(str);
            if (producer == null) {
                return USAGE(1);
            }
            ExtractingResourceProducer extractingResourceProducer = new ExtractingResourceProducer(producer, new ExtractingResourceFactoryMapper());
            Logger.getLogger("org.archive").setLevel(Level.WARNING);
            int i2 = 0;
            while (i2 < Integer.MAX_VALUE) {
                try {
                    next = extractingResourceProducer.getNext();
                } catch (GZIPFormatException e) {
                    LOG.severe(String.format("%s: %s", extractingResourceProducer.getContext(), e.getMessage()));
                    System.err.format("%s: %s", extractingResourceProducer.getContext(), e.getMessage());
                    if (ProducerUtils.STRICT_GZ) {
                        throw e;
                    }
                    e.printStackTrace();
                } catch (RecoverableRecordFormatException e2) {
                    LOG.severe(String.format("RECOVERABLE - %s: %s", extractingResourceProducer.getContext(), e2.getMessage()));
                    System.err.format("%s: %s", extractingResourceProducer.getContext(), e2.getMessage());
                    e2.printStackTrace();
                } catch (ResourceParseException e3) {
                    LOG.severe(String.format("%s: %s", extractingResourceProducer.getContext(), e3.getMessage()));
                    System.err.format("%s: %s", extractingResourceProducer.getContext(), e3.getMessage());
                    if (ProducerUtils.STRICT_GZ) {
                        throw e3;
                    }
                    e3.printStackTrace();
                }
                if (next == null) {
                    return 0;
                }
                i2++;
                dumpingExtractorOutput.output(next);
            }
            return 0;
        }
        return USAGE(1);
    }

    public OutputStream getOut() {
        return this.out;
    }

    public void setOut(OutputStream outputStream) {
        this.out = outputStream;
    }
}
