• Element


    Element

    package org.jsoup.nodes;
    
    import org.jsoup.helper.ChangeNotifyingArrayList;
    import org.jsoup.helper.StringUtil;
    import org.jsoup.helper.Validate;
    import org.jsoup.parser.ParseSettings;
    import org.jsoup.parser.Parser;
    import org.jsoup.parser.Tag;
    import org.jsoup.select.Collector;
    import org.jsoup.select.Elements;
    import org.jsoup.select.Evaluator;
    import org.jsoup.select.NodeTraversor;
    import org.jsoup.select.NodeVisitor;
    import org.jsoup.select.QueryParser;
    import org.jsoup.select.Selector;
    
    import java.io.IOException;
    import java.lang.ref.WeakReference;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Collection;
    import java.util.Collections;
    import java.util.LinkedHashSet;
    import java.util.List;
    import java.util.Map;
    import java.util.Set;
    import java.util.regex.Pattern;
    import java.util.regex.PatternSyntaxException;
    
    import static org.jsoup.internal.Normalizer.normalize;
    
    /**
     * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
     * other elements).
     * 
     * From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
     * 
     * @author Jonathan Hedley, jonathan@hedley.net
     */
    public class Element extends Node {
        private static final List<Node> EMPTY_NODES = Collections.emptyList();
        private static final Pattern classSplit = Pattern.compile("\\s+");
        private Tag tag;
        private WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
        List<Node> childNodes;
        private Attributes attributes;
        private String baseUri;
    
        /**
         * Create a new, standalone element.
         * @param tag tag name
         */
        public Element(String tag) {
            this(Tag.valueOf(tag), "", new Attributes());
        }
    
        /**
         * Create a new, standalone Element. (Standalone in that is has no parent.)
         * 
         * @param tag tag of this element
         * @param baseUri the base URI
         * @param attributes initial attributes
         * @see #appendChild(Node)
         * @see #appendElement(String)
         */
        public Element(Tag tag, String baseUri, Attributes attributes) {
            Validate.notNull(tag);
            Validate.notNull(baseUri);
            childNodes = EMPTY_NODES;
            this.baseUri = baseUri;
            this.attributes = attributes;
            this.tag = tag;
        }
        
        /**
         * Create a new Element from a tag and a base URI.
         * 
         * @param tag element tag
         * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
         *            string, but not null.
         * @see Tag#valueOf(String, ParseSettings)
         */
        public Element(Tag tag, String baseUri) {
            this(tag, baseUri, null);
        }
    
        protected List<Node> ensureChildNodes() {
            if (childNodes == EMPTY_NODES) {
                childNodes = new NodeList(this, 4);
            }
            return childNodes;
        }
    
        @Override
        protected boolean hasAttributes() {
            return attributes != null;
        }
    
        @Override
        public Attributes attributes() {
            if (!hasAttributes())
                attributes = new Attributes();
            return attributes;
        }
    
        @Override
        public String baseUri() {
            return baseUri;
        }
    
        @Override
        protected void doSetBaseUri(String baseUri) {
            this.baseUri = baseUri;
        }
    
        @Override
        public int childNodeSize() {
            return childNodes.size();
        }
    
        @Override
        public String nodeName() {
            return tag.getName();
        }
    
        /**
         * Get the name of the tag for this element. E.g. {@code div}
         * 
         * @return the tag name
         */
        public String tagName() {
            return tag.getName();
        }
    
        /**
         * Change the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
         * {@code el.tagName("div");}.
         *
         * @param tagName new tag name for this element
         * @return this element, for chaining
         */
        public Element tagName(String tagName) {
            Validate.notEmpty(tagName, "Tag name must not be empty.");
            tag = Tag.valueOf(tagName, ParseSettings.preserveCase); // preserve the requested tag case
            return this;
        }
    
        /**
         * Get the Tag for this element.
         * 
         * @return the tag object
         */
        public Tag tag() {
            return tag;
        }
        
        /**
         * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
         * {@code <p> == false}).
         * 
         * @return true if block, false if not (and thus inline)
         */
        public boolean isBlock() {
            return tag.isBlock();
        }
    
        /**
         * Get the {@code id} attribute of this element.
         * 
         * @return The id attribute, if present, or an empty string if not.
         */
        public String id() {
            return attributes().getIgnoreCase("id");
        }
    
        /**
         * Set an attribute value on this element. If this element already has an attribute with the
         * key, its value is updated; otherwise, a new attribute is added.
         * 
         * @return this element
         */
        public Element attr(String attributeKey, String attributeValue) {
            super.attr(attributeKey, attributeValue);
            return this;
        }
        
        /**
         * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and
         * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute
         * with the same key if it exists.
         * 
         * @param attributeKey the attribute key
         * @param attributeValue the attribute value
         * 
         * @return this element
         */
        public Element attr(String attributeKey, boolean attributeValue) {
            attributes().put(attributeKey, attributeValue);
            return this;
        }
    
        /**
         * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
         * starting with "data-" is included the dataset.
         * <p>
         * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
         * {@code package=jsoup, language=java}.
         * <p>
         * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
         * in the other map.
         * <p>
         * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
         * @return a map of {@code key=value} custom data attributes.
         */
        public Map<String, String> dataset() {
            return attributes().dataset();
        }
    
        @Override
        public final Element parent() {
            return (Element) parentNode;
        }
    
        /**
         * Get this element's parent and ancestors, up to the document root.
         * @return this element's stack of parents, closest first.
         */
        public Elements parents() {
            Elements parents = new Elements();
            accumulateParents(this, parents);
            return parents;
        }
    
        private static void accumulateParents(Element el, Elements parents) {
            Element parent = el.parent();
            if (parent != null && !parent.tagName().equals("#root")) {
                parents.add(parent);
                accumulateParents(parent, parents);
            }
        }
    
        /**
         * Get a child element of this element, by its 0-based index number.
         * <p>
         * Note that an element can have both mixed Nodes and Elements as children. This method inspects
         * a filtered list of children that are elements, and the index is based on that filtered list.
         * </p>
         * 
         * @param index the index number of the element to retrieve
         * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
         * @see #childNode(int)
         */
        public Element child(int index) {
            return childElementsList().get(index);
        }
    
        /**
         * Get this element's child elements.
         * <p>
         * This is effectively a filter on {@link #childNodes()} to get Element nodes.
         * </p>
         * @return child elements. If this element has no children, returns an empty list.
         * @see #childNodes()
         */
        public Elements children() {
            return new Elements(childElementsList());
        }
    
        /**
         * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
         * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around.
         * @return a list of child elements
         */
        private List<Element> childElementsList() {
            List<Element> children;
            if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) {
                final int size = childNodes.size();
                children = new ArrayList<>(size);
                //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
                for (int i = 0; i < size; i++) {
                    final Node node = childNodes.get(i);
                    if (node instanceof Element)
                        children.add((Element) node);
                }
                shadowChildrenRef = new WeakReference<>(children);
            }
            return children;
        }
    
        /**
         * Clears the cached shadow child elements.
         */
        @Override
        void nodelistChanged() {
            super.nodelistChanged();
            shadowChildrenRef = null;
        }
    
        /**
         * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
         * <p>
         * This is effectively a filter on {@link #childNodes()} to get Text nodes.
         * @return child text nodes. If this element has no text nodes, returns an
         * empty list.
         * </p>
         * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
         * <ul>
         *     <li>{@code p.text()} = {@code "One Two Three Four"}</li>
         *     <li>{@code p.ownText()} = {@code "One Three Four"}</li>
         *     <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
         *     <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
         *     <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
         * </ul>
         */
        public List<TextNode> textNodes() {
            List<TextNode> textNodes = new ArrayList<>();
            for (Node node : childNodes) {
                if (node instanceof TextNode)
                    textNodes.add((TextNode) node);
            }
            return Collections.unmodifiableList(textNodes);
        }
    
        /**
         * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
         * <p>
         * This is effectively a filter on {@link #childNodes()} to get Data nodes.
         * </p>
         * @return child data nodes. If this element has no data nodes, returns an
         * empty list.
         * @see #data()
         */
        public List<DataNode> dataNodes() {
            List<DataNode> dataNodes = new ArrayList<>();
            for (Node node : childNodes) {
                if (node instanceof DataNode)
                    dataNodes.add((DataNode) node);
            }
            return Collections.unmodifiableList(dataNodes);
        }
    
        /**
         * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
         * may include this element, or any of its children.
         * <p>
         * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
         * multiple filters can be combined, e.g.:
         * </p>
         * <ul>
         * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
         * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
         * </ul>
         * <p>
         * See the query syntax documentation in {@link org.jsoup.select.Selector}.
         * </p>
         * 
         * @param cssQuery a {@link Selector} CSS-like query
         * @return elements that match the query (empty if none match)
         * @see org.jsoup.select.Selector
         * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
         */
        public Elements select(String cssQuery) {
            return Selector.select(cssQuery, this);
        }
    
        /**
         * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
         * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
         * execution stops on the first hit.</p>
         * @param cssQuery cssQuery a {@link Selector} CSS-like query
         * @return the first matching element, or <b>{@code null}</b> if there is no match.
         */
        public Element selectFirst(String cssQuery) {
            return Selector.selectFirst(cssQuery, this);
        }
    
        /**
         * Check if this element matches the given {@link Selector} CSS query.
         * @param cssQuery a {@link Selector} CSS query
         * @return if this element matches the query
         */
        public boolean is(String cssQuery) {
            return is(QueryParser.parse(cssQuery));
        }
    
        /**
         * Check if this element matches the given evaluator.
         * @param evaluator an element evaluator
         * @return if this element matches
         */
        public boolean is(Evaluator evaluator) {
            return evaluator.matches((Element)this.root(), this);
        }
        
        /**
         * Add a node child node to this element.
         * 
         * @param child node to add.
         * @return this element, so that you can add more child nodes or elements.
         */
        public Element appendChild(Node child) {
            Validate.notNull(child);
    
            // was - Node#addChildren(child). short-circuits an array create and a loop.
            reparentChild(child);
            ensureChildNodes();
            childNodes.add(child);
            child.setSiblingIndex(childNodes.size() - 1);
            return this;
        }
    
        /**
         * Add this element to the supplied parent element, as its next child.
         *
         * @param parent element to which this element will be appended
         * @return this element, so that you can continue modifying the element
         */
        public Element appendTo(Element parent) {
            Validate.notNull(parent);
            parent.appendChild(this);
            return this;
        }
    
        /**
         * Add a node to the start of this element's children.
         * 
         * @param child node to add.
         * @return this element, so that you can add more child nodes or elements.
         */
        public Element prependChild(Node child) {
            Validate.notNull(child);
            
            addChildren(0, child);
            return this;
        }
    
    
        /**
         * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
         * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
         *
         * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
         * end
         * @param children child nodes to insert
         * @return this element, for chaining.
         */
        public Element insertChildren(int index, Collection<? extends Node> children) {
            Validate.notNull(children, "Children collection to be inserted must not be null.");
            int currentSize = childNodeSize();
            if (index < 0) index += currentSize +1; // roll around
            Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
    
            ArrayList<Node> nodes = new ArrayList<>(children);
            Node[] nodeArray = nodes.toArray(new Node[nodes.size()]);
            addChildren(index, nodeArray);
            return this;
        }
    
        /**
         * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
         * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
         *
         * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
         * end
         * @param children child nodes to insert
         * @return this element, for chaining.
         */
        public Element insertChildren(int index, Node... children) {
            Validate.notNull(children, "Children collection to be inserted must not be null.");
            int currentSize = childNodeSize();
            if (index < 0) index += currentSize +1; // roll around
            Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
    
            addChildren(index, children);
            return this;
        }
        
        /**
         * Create a new element by tag name, and add it as the last child.
         * 
         * @param tagName the name of the tag (e.g. {@code div}).
         * @return the new element, to allow you to add content to it, e.g.:
         *  {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
         */
        public Element appendElement(String tagName) {
            Element child = new Element(Tag.valueOf(tagName), baseUri());
            appendChild(child);
            return child;
        }
        
        /**
         * Create a new element by tag name, and add it as the first child.
         * 
         * @param tagName the name of the tag (e.g. {@code div}).
         * @return the new element, to allow you to add content to it, e.g.:
         *  {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
         */
        public Element prependElement(String tagName) {
            Element child = new Element(Tag.valueOf(tagName), baseUri());
            prependChild(child);
            return child;
        }
        
        /**
         * Create and append a new TextNode to this element.
         * 
         * @param text the unencoded text to add
         * @return this element
         */
        public Element appendText(String text) {
            Validate.notNull(text);
            TextNode node = new TextNode(text);
            appendChild(node);
            return this;
        }
        
        /**
         * Create and prepend a new TextNode to this element.
         * 
         * @param text the unencoded text to add
         * @return this element
         */
        public Element prependText(String text) {
            Validate.notNull(text);
            TextNode node = new TextNode(text);
            prependChild(node);
            return this;
        }
        
        /**
         * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
         * @param html HTML to add inside this element, after the existing HTML
         * @return this element
         * @see #html(String)
         */
        public Element append(String html) {
            Validate.notNull(html);
    
            List<Node> nodes = Parser.parseFragment(html, this, baseUri());
            addChildren(nodes.toArray(new Node[nodes.size()]));
            return this;
        }
        
        /**
         * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
         * @param html HTML to add inside this element, before the existing HTML
         * @return this element
         * @see #html(String)
         */
        public Element prepend(String html) {
            Validate.notNull(html);
            
            List<Node> nodes = Parser.parseFragment(html, this, baseUri());
            addChildren(0, nodes.toArray(new Node[nodes.size()]));
            return this;
        }
    
        /**
         * Insert the specified HTML into the DOM before this element (as a preceding sibling).
         *
         * @param html HTML to add before this element
         * @return this element, for chaining
         * @see #after(String)
         */
        @Override
        public Element before(String html) {
            return (Element) super.before(html);
        }
    
        /**
         * Insert the specified node into the DOM before this node (as a preceding sibling).
         * @param node to add before this element
         * @return this Element, for chaining
         * @see #after(Node)
         */
        @Override
        public Element before(Node node) {
            return (Element) super.before(node);
        }
    
        /**
         * Insert the specified HTML into the DOM after this element (as a following sibling).
         *
         * @param html HTML to add after this element
         * @return this element, for chaining
         * @see #before(String)
         */
        @Override
        public Element after(String html) {
            return (Element) super.after(html);
        }
    
        /**
         * Insert the specified node into the DOM after this node (as a following sibling).
         * @param node to add after this element
         * @return this element, for chaining
         * @see #before(Node)
         */
        @Override
        public Element after(Node node) {
            return (Element) super.after(node);
        }
    
        /**
         * Remove all of the element's child nodes. Any attributes are left as-is.
         * @return this element
         */
        public Element empty() {
            childNodes.clear();
            return this;
        }
    
        /**
         * Wrap the supplied HTML around this element.
         *
         * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
         * @return this element, for chaining.
         */
        @Override
        public Element wrap(String html) {
            return (Element) super.wrap(html);
        }
    
        /**
         * Get a CSS selector that will uniquely select this element.
         * <p>
         * If the element has an ID, returns #id;
         * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
         * followed by a unique selector for the element (tag.class.class:nth-child(n)).
         * </p>
         *
         * @return the CSS Path that can be used to retrieve the element in a selector.
         */
        public String cssSelector() {
            if (id().length() > 0)
                return "#" + id();
    
            // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag
            String tagName = tagName().replace(':', '|');
            StringBuilder selector = new StringBuilder(tagName);
            String classes = StringUtil.join(classNames(), ".");
            if (classes.length() > 0)
                selector.append('.').append(classes);
    
            if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
                return selector.toString();
    
            selector.insert(0, " > ");
            if (parent().select(selector.toString()).size() > 1)
                selector.append(String.format(
                    ":nth-child(%d)", elementSiblingIndex() + 1));
    
            return parent().cssSelector() + selector.toString();
        }
    
        /**
         * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
         * of itself, so will not be included in the returned list.
         * @return sibling elements
         */
        public Elements siblingElements() {
            if (parentNode == null)
                return new Elements(0);
    
            List<Element> elements = parent().childElementsList();
            Elements siblings = new Elements(elements.size() - 1);
            for (Element el: elements)
                if (el != this)
                    siblings.add(el);
            return siblings;
        }
    
        /**
         * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, 
         * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
         * <p>
         * This is similar to {@link #nextSibling()}, but specifically finds only Elements
         * </p>
         * @return the next element, or null if there is no next element
         * @see #previousElementSibling()
         */
        public Element nextElementSibling() {
            if (parentNode == null) return null;
            List<Element> siblings = parent().childElementsList();
            Integer index = indexInList(this, siblings);
            Validate.notNull(index);
            if (siblings.size() > index+1)
                return siblings.get(index+1);
            else
                return null;
        }
    
        /**
         * Gets the previous element sibling of this element.
         * @return the previous element, or null if there is no previous element
         * @see #nextElementSibling()
         */
        public Element previousElementSibling() {
            if (parentNode == null) return null;
            List<Element> siblings = parent().childElementsList();
            Integer index = indexInList(this, siblings);
            Validate.notNull(index);
            if (index > 0)
                return siblings.get(index-1);
            else
                return null;
        }
    
        /**
         * Gets the first element sibling of this element.
         * @return the first sibling that is an element (aka the parent's first element child) 
         */
        public Element firstElementSibling() {
            // todo: should firstSibling() exclude this?
            List<Element> siblings = parent().childElementsList();
            return siblings.size() > 1 ? siblings.get(0) : null;
        }
        
        /**
         * Get the list index of this element in its element sibling list. I.e. if this is the first element
         * sibling, returns 0.
         * @return position in element sibling list
         */
        public int elementSiblingIndex() {
           if (parent() == null) return 0;
           return indexInList(this, parent().childElementsList());
        }
    
        /**
         * Gets the last element sibling of this element
         * @return the last sibling that is an element (aka the parent's last element child) 
         */
        public Element lastElementSibling() {
            List<Element> siblings = parent().childElementsList();
            return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null;
        }
    
        private static <E extends Element> int indexInList(Element search, List<E> elements) {
            for (int i = 0; i < elements.size(); i++) {
                if (elements.get(i) == search)
                    return i;
            }
            return 0;
        }
    
        // DOM type methods
    
        /**
         * Finds elements, including and recursively under this element, with the specified tag name.
         * @param tagName The tag name to search for (case insensitively).
         * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
         */
        public Elements getElementsByTag(String tagName) {
            Validate.notEmpty(tagName);
            tagName = normalize(tagName);
    
            return Collector.collect(new Evaluator.Tag(tagName), this);
        }
    
        /**
         * Find an element by ID, including or under this element.
         * <p>
         * Note that this finds the first matching ID, starting with this element. If you search down from a different
         * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
         * use {@link Document#getElementById(String)}
         * @param id The ID to search for.
         * @return The first matching element by ID, starting with this element, or null if none found.
         */
        public Element getElementById(String id) {
            Validate.notEmpty(id);
            
            Elements elements = Collector.collect(new Evaluator.Id(id), this);
            if (elements.size() > 0)
                return elements.get(0);
            else
                return null;
        }
    
        /**
         * Find elements that have this class, including or under this element. Case insensitive.
         * <p>
         * Elements can have multiple classes (e.g. {@code <div class="header round first">}. This method
         * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
         * 
         * @param className the name of the class to search for.
         * @return elements with the supplied class name, empty if none
         * @see #hasClass(String)
         * @see #classNames()
         */
        public Elements getElementsByClass(String className) {
            Validate.notEmpty(className);
    
            return Collector.collect(new Evaluator.Class(className), this);
        }
    
        /**
         * Find elements that have a named attribute set. Case insensitive.
         *
         * @param key name of the attribute, e.g. {@code href}
         * @return elements that have this attribute, empty if none
         */
        public Elements getElementsByAttribute(String key) {
            Validate.notEmpty(key);
            key = key.trim();
    
            return Collector.collect(new Evaluator.Attribute(key), this);
        }
    
        /**
         * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
         * that have HTML5 datasets.
         * @param keyPrefix name prefix of the attribute e.g. {@code data-}
         * @return elements that have attribute names that start with with the prefix, empty if none.
         */
        public Elements getElementsByAttributeStarting(String keyPrefix) {
            Validate.notEmpty(keyPrefix);
            keyPrefix = keyPrefix.trim();
    
            return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
        }
    
        /**
         * Find elements that have an attribute with the specific value. Case insensitive.
         * 
         * @param key name of the attribute
         * @param value value of the attribute
         * @return elements that have this attribute with this value, empty if none
         */
        public Elements getElementsByAttributeValue(String key, String value) {
            return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
        }
    
        /**
         * Find elements that either do not have this attribute, or have it with a different value. Case insensitive.
         * 
         * @param key name of the attribute
         * @param value value of the attribute
         * @return elements that do not have a matching attribute
         */
        public Elements getElementsByAttributeValueNot(String key, String value) {
            return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
        }
    
        /**
         * Find elements that have attributes that start with the value prefix. Case insensitive.
         * 
         * @param key name of the attribute
         * @param valuePrefix start of attribute value
         * @return elements that have attributes that start with the value prefix
         */
        public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
            return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
        }
    
        /**
         * Find elements that have attributes that end with the value suffix. Case insensitive.
         * 
         * @param key name of the attribute
         * @param valueSuffix end of the attribute value
         * @return elements that have attributes that end with the value suffix
         */
        public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
            return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
        }
    
        /**
         * Find elements that have attributes whose value contains the match string. Case insensitive.
         * 
         * @param key name of the attribute
         * @param match substring of value to search for
         * @return elements that have attributes containing this text
         */
        public Elements getElementsByAttributeValueContaining(String key, String match) {
            return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
        }
        
        /**
         * Find elements that have attributes whose values match the supplied regular expression.
         * @param key name of the attribute
         * @param pattern compiled regular expression to match against attribute values
         * @return elements that have attributes matching this regular expression
         */
        public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
            return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
            
        }
        
        /**
         * Find elements that have attributes whose values match the supplied regular expression.
         * @param key name of the attribute
         * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
         * @return elements that have attributes matching this regular expression
         */
        public Elements getElementsByAttributeValueMatching(String key, String regex) {
            Pattern pattern;
            try {
                pattern = Pattern.compile(regex);
            } catch (PatternSyntaxException e) {
                throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
            }
            return getElementsByAttributeValueMatching(key, pattern);
        }
        
        /**
         * Find elements whose sibling index is less than the supplied index.
         * @param index 0-based index
         * @return elements less than index
         */
        public Elements getElementsByIndexLessThan(int index) {
            return Collector.collect(new Evaluator.IndexLessThan(index), this);
        }
        
        /**
         * Find elements whose sibling index is greater than the supplied index.
         * @param index 0-based index
         * @return elements greater than index
         */
        public Elements getElementsByIndexGreaterThan(int index) {
            return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
        }
        
        /**
         * Find elements whose sibling index is equal to the supplied index.
         * @param index 0-based index
         * @return elements equal to index
         */
        public Elements getElementsByIndexEquals(int index) {
            return Collector.collect(new Evaluator.IndexEquals(index), this);
        }
        
        /**
         * Find elements that contain the specified string. The search is case insensitive. The text may appear directly
         * in the element, or in any of its descendants.
         * @param searchText to look for in the element's text
         * @return elements that contain the string, case insensitive.
         * @see Element#text()
         */
        public Elements getElementsContainingText(String searchText) {
            return Collector.collect(new Evaluator.ContainsText(searchText), this);
        }
        
        /**
         * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly
         * in the element, not in any of its descendants.
         * @param searchText to look for in the element's own text
         * @return elements that contain the string, case insensitive.
         * @see Element#ownText()
         */
        public Elements getElementsContainingOwnText(String searchText) {
            return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
        }
        
        /**
         * Find elements whose text matches the supplied regular expression.
         * @param pattern regular expression to match text against
         * @return elements matching the supplied regular expression.
         * @see Element#text()
         */
        public Elements getElementsMatchingText(Pattern pattern) {
            return Collector.collect(new Evaluator.Matches(pattern), this);
        }
        
        /**
         * Find elements whose text matches the supplied regular expression.
         * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
         * @return elements matching the supplied regular expression.
         * @see Element#text()
         */
        public Elements getElementsMatchingText(String regex) {
            Pattern pattern;
            try {
                pattern = Pattern.compile(regex);
            } catch (PatternSyntaxException e) {
                throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
            }
            return getElementsMatchingText(pattern);
        }
        
        /**
         * Find elements whose own text matches the supplied regular expression.
         * @param pattern regular expression to match text against
         * @return elements matching the supplied regular expression.
         * @see Element#ownText()
         */
        public Elements getElementsMatchingOwnText(Pattern pattern) {
            return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
        }
        
        /**
         * Find elements whose text matches the supplied regular expression.
         * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
         * @return elements matching the supplied regular expression.
         * @see Element#ownText()
         */
        public Elements getElementsMatchingOwnText(String regex) {
            Pattern pattern;
            try {
                pattern = Pattern.compile(regex);
            } catch (PatternSyntaxException e) {
                throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
            }
            return getElementsMatchingOwnText(pattern);
        }
        
        /**
         * Find all elements under this element (including self, and children of children).
         * 
         * @return all elements
         */
        public Elements getAllElements() {
            return Collector.collect(new Evaluator.AllElements(), this);
        }
    
        /**
         * Gets the combined text of this element and all its children. Whitespace is normalized and trimmed.
         * <p>
         * For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there now!"}
         *
         * @return unencoded, normalized text, or empty string if none.
         * @see #wholeText() if you don't want the text to be normalized.
         * @see #ownText()
         * @see #textNodes()
         */
        public String text() {
            final StringBuilder accum = new StringBuilder();
            NodeTraversor.traverse(new NodeVisitor() {
                public void head(Node node, int depth) {
                    if (node instanceof TextNode) {
                        TextNode textNode = (TextNode) node;
                        appendNormalisedText(accum, textNode);
                    } else if (node instanceof Element) {
                        Element element = (Element) node;
                        if (accum.length() > 0 &&
                            (element.isBlock() || element.tag.getName().equals("br")) &&
                            !TextNode.lastCharIsWhitespace(accum))
                            accum.append(' ');
                    }
                }
    
                public void tail(Node node, int depth) {
                    // make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two".
                    if (node instanceof Element) {
                        Element element = (Element) node;
                        if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !TextNode.lastCharIsWhitespace(accum))
                            accum.append(' ');
                    }
    
                }
            }, this);
            return accum.toString().trim();
        }
    
        /**
         * Get the (unencoded) text of all children of this element, including any newlines and spaces present in the
         * original.
         *
         * @return unencoded, un-normalized text
         * @see #text()
         */
        public String wholeText() {
            final StringBuilder accum = new StringBuilder();
            NodeTraversor.traverse(new NodeVisitor() {
                public void head(Node node, int depth) {
                    if (node instanceof TextNode) {
                        TextNode textNode = (TextNode) node;
                        accum.append(textNode.getWholeText());
                    }
                }
    
                public void tail(Node node, int depth) {
                }
            }, this);
            return accum.toString();
        }
    
        /**
         * Gets the text owned by this element only; does not get the combined text of all children.
         * <p>
         * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
         * whereas {@code p.text()} returns {@code "Hello there now!"}.
         * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
         *
         * @return unencoded text, or empty string if none.
         * @see #text()
         * @see #textNodes()
         */
        public String ownText() {
            StringBuilder sb = new StringBuilder();
            ownText(sb);
            return sb.toString().trim();
        }
    
        private void ownText(StringBuilder accum) {
            for (Node child : childNodes) {
                if (child instanceof TextNode) {
                    TextNode textNode = (TextNode) child;
                    appendNormalisedText(accum, textNode);
                } else if (child instanceof Element) {
                    appendWhitespaceIfBr((Element) child, accum);
                }
            }
        }
    
        private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
            String text = textNode.getWholeText();
    
            if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode)
                accum.append(text);
            else
                StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum));
        }
    
        private static void appendWhitespaceIfBr(Element element, StringBuilder accum) {
            if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum))
                accum.append(" ");
        }
    
        static boolean preserveWhitespace(Node node) {
            // looks only at this element and five levels up, to prevent recursion & needless stack searches
            if (node != null && node instanceof Element) {
                Element el = (Element) node;
                int i = 0;
                do {
                    if (el.tag.preserveWhitespace())
                        return true;
                    el = el.parent();
                    i++;
                } while (i < 6 && el != null);
            }
            return false;
        }
    
        /**
         * Set the text of this element. Any existing contents (text or elements) will be cleared
         * @param text unencoded text
         * @return this element
         */
        public Element text(String text) {
            Validate.notNull(text);
    
            empty();
            TextNode textNode = new TextNode(text);
            appendChild(textNode);
    
            return this;
        }
    
        /**
         Test if this element has any text content (that is not just whitespace).
         @return true if element has non-blank text content.
         */
        public boolean hasText() {
            for (Node child: childNodes) {
                if (child instanceof TextNode) {
                    TextNode textNode = (TextNode) child;
                    if (!textNode.isBlank())
                        return true;
                } else if (child instanceof Element) {
                    Element el = (Element) child;
                    if (el.hasText())
                        return true;
                }
            }
            return false;
        }
    
        /**
         * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag. Note that data is NOT the
         * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@link #data()}
         * for the contents of scripts, comments, CSS styles, etc.
         *
         * @return the data, or empty string if none
         *
         * @see #dataNodes()
         */
        public String data() {
            StringBuilder sb = new StringBuilder();
    
            for (Node childNode : childNodes) {
                if (childNode instanceof DataNode) {
                    DataNode data = (DataNode) childNode;
                    sb.append(data.getWholeData());
                } else if (childNode instanceof Comment) {
                    Comment comment = (Comment) childNode;
                    sb.append(comment.getData());
                } else if (childNode instanceof Element) {
                    Element element = (Element) childNode;
                    String elementData = element.data();
                    sb.append(elementData);
                } else if (childNode instanceof CDataNode) {
                    // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script.
                    // but incase another type gets through.
                    CDataNode cDataNode = (CDataNode) childNode;
                    sb.append(cDataNode.getWholeText());
                }
            }
            return sb.toString();
        }   
    
        /**
         * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
         * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>")
         * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
         */
        public String className() {
            return attr("class").trim();
        }
    
        /**
         * Get all of the element's class names. E.g. on element {@code <div class="header gray">},
         * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
         * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
         * @return set of classnames, empty if no class attribute
         */
        public Set<String> classNames() {
        	String[] names = classSplit.split(className());
        	Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names));
        	classNames.remove(""); // if classNames() was empty, would include an empty class
    
            return classNames;
        }
    
        /**
         Set the element's {@code class} attribute to the supplied class names.
         @param classNames set of classes
         @return this element, for chaining
         */
        public Element classNames(Set<String> classNames) {
            Validate.notNull(classNames);
            if (classNames.isEmpty()) {
                attributes().remove("class");
            } else {
                attributes().put("class", StringUtil.join(classNames, " "));
            }
            return this;
        }
    
        /**
         * Tests if this element has a class. Case insensitive.
         * @param className name of class to check for
         * @return true if it does, false if not
         */
        // performance sensitive
        public boolean hasClass(String className) {
            final String classAttr = attributes().getIgnoreCase("class");
            final int len = classAttr.length();
            final int wantLen = className.length();
    
            if (len == 0 || len < wantLen) {
                return false;
            }
    
            // if both lengths are equal, only need compare the className with the attribute
            if (len == wantLen) {
                return className.equalsIgnoreCase(classAttr);
            }
    
            // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations)
            boolean inClass = false;
            int start = 0;
            for (int i = 0; i < len; i++) {
                if (Character.isWhitespace(classAttr.charAt(i))) {
                    if (inClass) {
                        // white space ends a class name, compare it with the requested one, ignore case
                        if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) {
                            return true;
                        }
                        inClass = false;
                    }
                } else {
                    if (!inClass) {
                        // we're in a class name : keep the start of the substring
                        inClass = true;
                        start = i;
                    }
                }
            }
    
            // check the last entry
            if (inClass && len - start == wantLen) {
                return classAttr.regionMatches(true, start, className, 0, wantLen);
            }
    
            return false;
        }
    
        /**
         Add a class name to this element's {@code class} attribute.
         @param className class name to add
         @return this element
         */
        public Element addClass(String className) {
            Validate.notNull(className);
    
            Set<String> classes = classNames();
            classes.add(className);
            classNames(classes);
    
            return this;
        }
    
        /**
         Remove a class name from this element's {@code class} attribute.
         @param className class name to remove
         @return this element
         */
        public Element removeClass(String className) {
            Validate.notNull(className);
    
            Set<String> classes = classNames();
            classes.remove(className);
            classNames(classes);
    
            return this;
        }
    
        /**
         Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
         @param className class name to toggle
         @return this element
         */
        public Element toggleClass(String className) {
            Validate.notNull(className);
    
            Set<String> classes = classNames();
            if (classes.contains(className))
                classes.remove(className);
            else
                classes.add(className);
            classNames(classes);
    
            return this;
        }
        
        /**
         * Get the value of a form element (input, textarea, etc).
         * @return the value of the form element, or empty string if not set.
         */
        public String val() {
            if (tagName().equals("textarea"))
                return text();
            else
                return attr("value");
        }
        
        /**
         * Set the value of a form element (input, textarea, etc).
         * @param value value to set
         * @return this element (for chaining)
         */
        public Element val(String value) {
            if (tagName().equals("textarea"))
                text(value);
            else
                attr("value", value);
            return this;
        }
    
        void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException {
            if (out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline())) {
                if (accum instanceof StringBuilder) {
                    if (((StringBuilder) accum).length() > 0)
                        indent(accum, depth, out);
                } else {
                    indent(accum, depth, out);
                }
            }
            accum.append('<').append(tagName());
            if (attributes != null) attributes.html(accum, out);
    
            // selfclosing includes unknown tags, isEmpty defines tags that are always empty
            if (childNodes.isEmpty() && tag.isSelfClosing()) {
                if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty())
                    accum.append('>');
                else
                    accum.append(" />"); // <img> in html, <img /> in xml
            }
            else
                accum.append('>');
        }
    
    	void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
            if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
                if (out.prettyPrint() && (!childNodes.isEmpty() && (
                        tag.formatAsBlock() || (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && !(childNodes.get(0) instanceof TextNode))))
                )))
                    indent(accum, depth, out);
                accum.append("</").append(tagName()).append('>');
            }
        }
    
        /**
         * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
         * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
         * 
         * @return String of HTML.
         * @see #outerHtml()
         */
        public String html() {
            StringBuilder accum = StringUtil.stringBuilder();
            html(accum);
            return getOutputSettings().prettyPrint() ? accum.toString().trim() : accum.toString();
        }
    
        private void html(StringBuilder accum) {
            for (Node node : childNodes)
                node.outerHtml(accum);
        }
    
        /**
         * {@inheritDoc}
         */
        @Override
        public <T extends Appendable> T html(T appendable) {
            for (Node node : childNodes)
                node.outerHtml(appendable);
    
            return appendable;
        }
        
        /**
         * Set this element's inner HTML. Clears the existing HTML first.
         * @param html HTML to parse and set into this element
         * @return this element
         * @see #append(String)
         */
        public Element html(String html) {
            empty();
            append(html);
            return this;
        }
    
    	public String toString() {
            return outerHtml();
        }
    
        @Override
        public Element clone() {
            return (Element) super.clone();
        }
    
        @Override
        public Element shallowClone() {
            // simpler than implementing a clone version with no child copy
            return new Element(tag, baseUri, attributes);
        }
    
        @Override
        protected Element doClone(Node parent) {
            Element clone = (Element) super.doClone(parent);
            clone.attributes = attributes != null ? attributes.clone() : null;
            clone.baseUri = baseUri;
            clone.childNodes = new NodeList(clone, childNodes.size());
            clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone
    
            return clone;
        }
    
        private static final class NodeList extends ChangeNotifyingArrayList<Node> {
            private final Element owner;
    
            NodeList(Element owner, int initialCapacity) {
                super(initialCapacity);
                this.owner = owner;
            }
    
            public void onContentsChanged() {
                owner.nodelistChanged();
            }
        }
    }
    

      

    ##################

  • 相关阅读:
    TyvjP2018 「Nescafé26」小猫爬山
    简化版桶排序
    getchar吸收回车
    fprintf与fscanf
    c语言命令行参数
    bit、byte、位、字节、汉字的关系
    C语言联合体
    结构体
    关于Integer的parseInt(String s, int radix)方法的使用
    java中nextLine()和next()的区别
  • 原文地址:https://www.cnblogs.com/herd/p/16268420.html
Copyright © 2020-2023  润新知