package org.archive.format.text.charset;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.format.http.HttpHeader;
import org.archive.format.http.HttpHeaders;
import org.mozilla.universalchardet.UniversalDetector;

/* loaded from: input_file:org/archive/format/text/charset/CharsetDetector.class */
public abstract class CharsetDetector {
    private static final String META_TAGNAME = "META";
    private static final String META_CONTENT_ATTRIBUTE = "content";
    private static final String META_HTTP_EQUIV_ATTRIBUTE = "http-equiv";
    private static final String META_CONTENT_TYPE = "Content-Type";
    private static final String QUOTED_ATTR_VALUE = "(?:\"[^\">]*\")";
    private static final String ESC_QUOTED_ATTR_VALUE = "(?:\\\\\"[^>\\\\]*\\\\\")";
    private static final String APOSED_ATTR_VALUE = "(?:'[^'>]*')";
    private static final String ANY_ATTR_VALUE = "(?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|";
    protected static final int MAX_CHARSET_READAHEAD = 65536;
    protected static final String CHARSET_TOKEN = "charset=";
    protected static final String HTTP_CONTENT_TYPE_HEADER = "CONTENT-TYPE";
    public static final String DEFAULT_CHARSET = "UTF-8";
    private static final String META_TAG_PATTERN_STRING = "<\\s*META((>)|(\\s+[^>]*>))";
    private static final Pattern META_TAG_PATTERN = Pattern.compile(META_TAG_PATTERN_STRING, 2);
    private static final String META_CONTENT_ATTR_PATTERN_STRING = "\\bcontent\\s*=\\s*((?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|)(?:\\s|>)?";
    private static final Pattern META_CONTENT_ATTR_PATTERN = Pattern.compile(META_CONTENT_ATTR_PATTERN_STRING, 2);
    private static final String META_HTTP_EQUIV_ATTR_PATTERN_STRING = "\\bhttp-equiv\\s*=\\s*(Content-Type|(?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|)(?:\\s|>)?";
    private static final Pattern META_HTTP_EQUIV_ATTR_PATTERN = Pattern.compile(META_HTTP_EQUIV_ATTR_PATTERN_STRING, 2);

    protected boolean isCharsetSupported(String str) {
        if (str == null) {
            return false;
        }
        try {
            return Charset.isSupported(str);
        } catch (IllegalCharsetNameException e) {
            return false;
        }
    }

    protected String mapCharset(String str) {
        String lowerCase = str.toLowerCase();
        return (lowerCase.contains("iso8859-1") || lowerCase.contains("iso-8859-1")) ? "cp1252" : str;
    }

    protected String contentTypeToCharset(String str) {
        int indexOf = str.toUpperCase().indexOf(CHARSET_TOKEN.toUpperCase());
        if (indexOf == -1) {
            return null;
        }
        String substring = str.substring(indexOf + CHARSET_TOKEN.length());
        if (isCharsetSupported(substring)) {
            return mapCharset(substring);
        }
        String replace = substring.replace(" ", "");
        if (isCharsetSupported(replace)) {
            return mapCharset(replace);
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getCharsetFromHeaders(HttpHeaders httpHeaders) throws IOException {
        if (httpHeaders == null) {
            return null;
        }
        Iterator<HttpHeader> it2 = httpHeaders.iterator();
        while (it2.hasNext()) {
            HttpHeader next = it2.next();
            if (next.getName().toUpperCase().trim().equals(HTTP_CONTENT_TYPE_HEADER)) {
                return contentTypeToCharset(next.getValue());
            }
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getCharsetFromMeta(byte[] bArr, int i) throws IOException {
        String str = null;
        String findMetaContentType = findMetaContentType(new String(bArr, 0, i, "UTF-8"));
        if (findMetaContentType != null) {
            str = contentTypeToCharset(findMetaContentType);
        }
        return str;
    }

    private static String trimAttrValue(String str) {
        if (str.isEmpty()) {
            return str;
        }
        String str2 = str;
        if (str2.charAt(0) == '\"') {
            str2 = str2.substring(1, str2.length() - 1);
        } else if (str2.charAt(0) == '\'') {
            str2 = str2.substring(1, str2.length() - 1);
        }
        return str2;
    }

    public static String findMetaContentType(String str) {
        Matcher matcher = META_TAG_PATTERN.matcher(str);
        while (matcher.find()) {
            String group = matcher.group();
            Matcher matcher2 = META_HTTP_EQUIV_ATTR_PATTERN.matcher(group);
            if (matcher2.find() && trimAttrValue(matcher2.group(1)).compareToIgnoreCase("Content-Type") == 0) {
                Matcher matcher3 = META_CONTENT_ATTR_PATTERN.matcher(group);
                String str2 = null;
                if (matcher3.find()) {
                    str2 = trimAttrValue(matcher3.group(1));
                }
                return str2;
            }
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getCharsetFromBytes(byte[] bArr, int i) throws IOException {
        UniversalDetector universalDetector = new UniversalDetector(null);
        universalDetector.handleData(bArr, 0, i);
        universalDetector.dataEnd();
        String detectedCharset = universalDetector.getDetectedCharset();
        universalDetector.reset();
        if (isCharsetSupported(detectedCharset)) {
            return mapCharset(detectedCharset);
        }
        return null;
    }

    public abstract String getCharset(byte[] bArr, int i, HttpHeaders httpHeaders) throws IOException;
}
