package org.archive.resource.html;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.format.text.html.NodeUtils;
import org.archive.format.text.html.ParseObserver;
import org.eclipse.jdt.internal.core.ClasspathEntry;
import org.htmlparser.Attribute;
import org.htmlparser.beans.FilterBean;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.nodes.TextNode;

/* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver.class */
public class ExtractingParseObserver implements ParseObserver {
    HTMLMetaData data;
    private static final int MAX_TEXT_LEN = 100;
    private static final String PATH = "path";
    private static final String PATH_SEPARATOR = "@/";
    private static final Set<String> globalHrefAttributes;
    protected static String cssUrlPatString = "url\\s*\\(\\s*((?:\\\\?[\"'])?.+?(?:\\\\?[\"'])?)\\s*\\)";
    protected static String cssUrlTrimPatString = "^(?:\\\\?[\"'])+|(?:\\\\?[\"'])+$";
    protected static String cssImportNoUrlPatString = "@import\\s+((?:'[^']+')|(?:\"[^\"]+\")|(?:\\('[^']+'\\))|(?:\\(\"[^\"]+\"\\))|(?:\\([^)]+\\))|(?:[a-z0-9_.:/\\\\-]+))\\s*;";
    protected static Pattern cssImportNoUrlPattern = Pattern.compile(cssImportNoUrlPatString);
    protected static Pattern cssUrlPattern = Pattern.compile(cssUrlPatString);
    protected static Pattern cssUrlTrimPattern = Pattern.compile(cssUrlTrimPatString);
    private static final Map<String, TagExtractor> extractors = new HashMap();
    String title = null;
    boolean inTitle = false;
    Stack<ArrayList<String>> openAnchors = new Stack<>();
    Stack<StringBuilder> openAnchorTexts = new Stack<>();

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$AnchorTagExtractor.class */
    private static class AnchorTagExtractor implements TagExtractor {
        private AnchorTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ArrayList<String> arrayList = new ArrayList<>();
            String attribute = tagNode.getAttribute("href");
            if (attribute != null) {
                arrayList.add("path");
                arrayList.add(ExtractingParseObserver.makePath("A", "href"));
                arrayList.add("url");
                arrayList.add(attribute);
                for (String str : new String[]{"target", "alt", "title"}) {
                    String attribute2 = tagNode.getAttribute(str);
                    if (attribute2 != null) {
                        arrayList.add(str);
                        arrayList.add(attribute2);
                    }
                }
            }
            if (tagNode.isEmptyXmlTag()) {
                hTMLMetaData.addHref(arrayList);
            } else {
                extractingParseObserver.openAnchors.push(arrayList);
                extractingParseObserver.openAnchorTexts.push(new StringBuilder());
            }
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$AppletTagExtractor.class */
    private static class AppletTagExtractor implements TagExtractor {
        private AppletTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "codebase", "cdata");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$AreaTagExtractor.class */
    private static class AreaTagExtractor implements TagExtractor {
        private AreaTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "href");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$BaseTagExtractor.class */
    private static class BaseTagExtractor implements TagExtractor {
        private BaseTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            String attribute = tagNode.getAttribute("href");
            if (attribute != null) {
                hTMLMetaData.setBaseHref(attribute);
            }
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$ButtonTagExtractor.class */
    private static class ButtonTagExtractor implements TagExtractor {
        private ButtonTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "formaction");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$EmbedTagExtractor.class */
    private static class EmbedTagExtractor implements TagExtractor {
        private EmbedTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "src");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$EmbedVideoTagExtractor.class */
    private static class EmbedVideoTagExtractor implements TagExtractor {
        private EmbedVideoTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "src", "poster");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$FormTagExtractor.class */
    private static class FormTagExtractor implements TagExtractor {
        private FormTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ArrayList arrayList = new ArrayList();
            String attribute = tagNode.getAttribute("action");
            if (attribute != null) {
                arrayList.add("path");
                arrayList.add(ExtractingParseObserver.makePath("FORM", "action"));
                arrayList.add("url");
                arrayList.add(attribute);
                for (String str : new String[]{"target", "method"}) {
                    String attribute2 = tagNode.getAttribute(str);
                    if (attribute2 != null) {
                        arrayList.add(str);
                        arrayList.add(attribute2);
                    }
                }
                hTMLMetaData.addHref(arrayList);
            }
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$FrameTagExtractor.class */
    private static class FrameTagExtractor implements TagExtractor {
        private FrameTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "src");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$IFrameTagExtractor.class */
    private static class IFrameTagExtractor implements TagExtractor {
        private IFrameTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "src");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$ImgTagExtractor.class */
    private static class ImgTagExtractor implements TagExtractor {
        private ImgTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addHrefWithAttrs(hTMLMetaData, tagNode, "src", "alt", "title");
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "longdesc");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$InputTagExtractor.class */
    private static class InputTagExtractor implements TagExtractor {
        private InputTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "src", "formaction");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$LinkTagExtractor.class */
    private static class LinkTagExtractor implements TagExtractor {
        private LinkTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ArrayList attrListUrl = ExtractingParseObserver.getAttrListUrl(tagNode, "href", "rel", "type");
            if (attrListUrl != null) {
                hTMLMetaData.addLink(attrListUrl);
            }
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$MenuitemTagExtractor.class */
    private static class MenuitemTagExtractor implements TagExtractor {
        private MenuitemTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "icon");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$MetaTagExtractor.class */
    private static class MetaTagExtractor implements TagExtractor {
        private MetaTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ArrayList attrList = ExtractingParseObserver.getAttrList(tagNode, ClasspathEntry.TAG_ATTRIBUTE_NAME, "rel", "content", "http-equiv", "property");
            if (attrList != null) {
                hTMLMetaData.addMeta(attrList);
            }
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$ObjectTagExtractor.class */
    private static class ObjectTagExtractor implements TagExtractor {
        private ObjectTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "codebase", "cdata", "data");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$QuotationLinkTagExtractor.class */
    private static class QuotationLinkTagExtractor implements TagExtractor {
        private QuotationLinkTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ExtractingParseObserver.addBasicHrefs(hTMLMetaData, tagNode, "cite");
        }
    }

    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$ScriptTagExtractor.class */
    private static class ScriptTagExtractor implements TagExtractor {
        private ScriptTagExtractor() {
        }

        @Override // org.archive.resource.html.ExtractingParseObserver.TagExtractor
        public void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver) {
            ArrayList attrListUrl = ExtractingParseObserver.getAttrListUrl(tagNode, "src", "type");
            if (attrListUrl != null) {
                hTMLMetaData.addScript(attrListUrl);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/archive/resource/html/ExtractingParseObserver$TagExtractor.class */
    public interface TagExtractor {
        void extract(HTMLMetaData hTMLMetaData, TagNode tagNode, ExtractingParseObserver extractingParseObserver);
    }

    public ExtractingParseObserver(HTMLMetaData hTMLMetaData) {
        this.data = hTMLMetaData;
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleDocumentStart() {
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleDocumentComplete() {
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleTagEmpty(TagNode tagNode) {
        handleTagOpen(tagNode);
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleTagOpen(TagNode tagNode) {
        String tagName = tagNode.getTagName();
        if (tagName.equals("TITLE")) {
            this.inTitle = !tagNode.isEmptyXmlTag();
            return;
        }
        Iterator it2 = tagNode.getAttributesEx().iterator();
        while (it2.hasNext()) {
            Attribute attribute = (Attribute) it2.next();
            String name = attribute.getName();
            String value = attribute.getValue();
            if (name != null && value != null) {
                String lowerCase = name.toLowerCase(Locale.ROOT);
                if (globalHrefAttributes.contains(lowerCase)) {
                    this.data.addHref("path", makePath(tagName, lowerCase), "url", value);
                }
            }
        }
        TagExtractor tagExtractor = extractors.get(tagName);
        if (tagExtractor != null) {
            tagExtractor.extract(this.data, tagNode, this);
        }
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleTagClose(TagNode tagNode) {
        if (this.inTitle) {
            this.inTitle = false;
            this.data.setTitle(this.title);
            this.title = null;
            return;
        }
        if (!tagNode.getTagName().equals("A") || this.openAnchors.size() <= 0) {
            return;
        }
        ArrayList<String> pop = this.openAnchors.pop();
        StringBuilder pop2 = this.openAnchorTexts.pop();
        if (pop == null || pop.size() <= 0) {
            return;
        }
        if (pop2 != null) {
            String replaceAll = pop2.toString().trim().replaceAll("\\s+", " ");
            if (replaceAll.length() > 100) {
                replaceAll = replaceAll.substring(0, 100);
            }
            if (replaceAll.length() > 0) {
                pop.add(FilterBean.PROP_TEXT_PROPERTY);
                pop.add(replaceAll);
            }
        }
        this.data.addHref(pop);
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleTextNode(TextNode textNode) {
        String replaceAll = textNode.getText().replaceAll("\\s+", " ");
        if (replaceAll.length() > 100) {
            replaceAll = replaceAll.substring(0, 100);
        }
        if (this.inTitle) {
            this.title = replaceAll;
            return;
        }
        Iterator<StringBuilder> it2 = this.openAnchorTexts.iterator();
        while (it2.hasNext()) {
            StringBuilder next = it2.next();
            if (next.length() >= 100) {
                return;
            }
            if (next.length() + replaceAll.length() < 100) {
                next.append(replaceAll);
            } else {
                next.append(replaceAll.substring(0, 100 - next.length()));
            }
        }
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleScriptNode(TextNode textNode) {
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleStyleNode(TextNode textNode) {
        patternCSSExtract(this.data, cssUrlPattern, textNode.getText());
        patternCSSExtract(this.data, cssImportNoUrlPattern, textNode.getText());
    }

    @Override // org.archive.format.text.html.ParseObserver
    public void handleRemarkNode(RemarkNode remarkNode) {
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String makePath(String str, String str2) {
        return new StringBuilder(str.length() + PATH_SEPARATOR.length() + str2.length()).append(str).append(PATH_SEPARATOR).append(str2).toString();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void addBasicHrefs(HTMLMetaData hTMLMetaData, TagNode tagNode, String... strArr) {
        for (String str : strArr) {
            String attribute = tagNode.getAttribute(str);
            if (attribute != null) {
                hTMLMetaData.addHref("path", makePath(tagNode.getTagName(), str), "url", attribute);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static ArrayList<String> getAttrList(TagNode tagNode, String... strArr) {
        ArrayList<String> arrayList = new ArrayList<>();
        for (String str : strArr) {
            String attribute = tagNode.getAttribute(str);
            if (attribute != null) {
                arrayList.add(str);
                arrayList.add(attribute);
            }
        }
        if (arrayList.size() == 0) {
            return null;
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static ArrayList<String> getAttrListUrl(TagNode tagNode, String str, String... strArr) {
        String attribute = tagNode.getAttribute(str);
        ArrayList<String> arrayList = null;
        if (attribute != null) {
            arrayList = new ArrayList<>();
            arrayList.add("path");
            arrayList.add(makePath(tagNode.getTagName(), str));
            arrayList.add("url");
            arrayList.add(attribute);
            for (String str2 : strArr) {
                String attribute2 = tagNode.getAttribute(str2);
                if (attribute2 != null) {
                    arrayList.add(str2);
                    arrayList.add(attribute2);
                }
            }
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void addHrefWithAttrs(HTMLMetaData hTMLMetaData, TagNode tagNode, String str, String... strArr) {
        ArrayList<String> attrListUrl = getAttrListUrl(tagNode, str, strArr);
        if (attrListUrl != null) {
            hTMLMetaData.addHref(attrListUrl);
        }
    }

    private void patternCSSExtract(HTMLMetaData hTMLMetaData, Pattern pattern, String str) {
        Matcher matcher = pattern.matcher(str);
        int i = 0;
        int length = str.length();
        if (length > 100000) {
            length = 100000;
        }
        while (i < length && matcher.find()) {
            i = matcher.end();
            String replaceAll = cssUrlTrimPattern.matcher(matcher.group(1)).replaceAll("");
            if (!replaceAll.isEmpty()) {
                hTMLMetaData.addHref("path", "STYLE/#text", "href", replaceAll);
            }
        }
    }

    static {
        extractors.put("A", new AnchorTagExtractor());
        extractors.put("APPLET", new AppletTagExtractor());
        extractors.put("AREA", new AreaTagExtractor());
        extractors.put("BASE", new BaseTagExtractor());
        extractors.put("EMBED", new EmbedTagExtractor());
        extractors.put("FORM", new FormTagExtractor());
        extractors.put("FRAME", new FrameTagExtractor());
        extractors.put("IFRAME", new IFrameTagExtractor());
        extractors.put("IMG", new ImgTagExtractor());
        extractors.put("INPUT", new InputTagExtractor());
        extractors.put("LINK", new LinkTagExtractor());
        extractors.put("META", new MetaTagExtractor());
        extractors.put("OBJECT", new ObjectTagExtractor());
        extractors.put(NodeUtils.SCRIPT_TAG_NAME, new ScriptTagExtractor());
        extractors.put("Q", new QuotationLinkTagExtractor());
        extractors.put("BLOCKQUOTE", new QuotationLinkTagExtractor());
        extractors.put("DEL", new QuotationLinkTagExtractor());
        extractors.put("INS", new QuotationLinkTagExtractor());
        extractors.put("BUTTON", new ButtonTagExtractor());
        extractors.put("MENUITEM", new MenuitemTagExtractor());
        extractors.put("VIDEO", new EmbedVideoTagExtractor());
        extractors.put("AUDIO", new EmbedTagExtractor());
        extractors.put("TRACK", new EmbedTagExtractor());
        extractors.put("SOURCE", new EmbedTagExtractor());
        globalHrefAttributes = new HashSet();
        globalHrefAttributes.add("background");
        globalHrefAttributes.add("data-href");
        globalHrefAttributes.add("data-uri");
    }
}
