588 lines
18 KiB
PHP
588 lines
18 KiB
PHP
<?php
|
|
/**
|
|
* Class AMP_Base_Sanitizer
|
|
*
|
|
* @package AMP
|
|
*/
|
|
|
|
/**
|
|
* Class AMP_Base_Sanitizer
|
|
*/
|
|
abstract class AMP_Base_Sanitizer {
|
|
|
|
/**
|
|
* Value used with the height attribute in an $attributes parameter is empty.
|
|
*
|
|
* @since 0.3.3
|
|
*
|
|
* @const int
|
|
*/
|
|
const FALLBACK_HEIGHT = 400;
|
|
|
|
/**
|
|
* Value for <amp-image-lightbox> ID.
|
|
*
|
|
* @since 1.0
|
|
*
|
|
* @const string
|
|
*/
|
|
const AMP_IMAGE_LIGHTBOX_ID = 'amp-image-lightbox';
|
|
|
|
/**
|
|
* Placeholder for default args, to be set in child classes.
|
|
*
|
|
* @since 0.2
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $DEFAULT_ARGS = array();
|
|
|
|
/**
|
|
* DOM.
|
|
*
|
|
* @var DOMDocument A standard PHP representation of an HTML document in object form.
|
|
*
|
|
* @since 0.2
|
|
*/
|
|
protected $dom;
|
|
|
|
/**
|
|
* Array of flags used to control sanitization.
|
|
*
|
|
* @var array {
|
|
* @type int $content_max_width
|
|
* @type bool $add_placeholder
|
|
* @type bool $use_document_element
|
|
* @type bool $require_https_src
|
|
* @type string[] $amp_allowed_tags
|
|
* @type string[] $amp_globally_allowed_attributes
|
|
* @type string[] $amp_layout_allowed_attributes
|
|
* @type array $amp_allowed_tags
|
|
* @type array $amp_globally_allowed_attributes
|
|
* @type array $amp_layout_allowed_attributes
|
|
* @type array $amp_bind_placeholder_prefix
|
|
* @type bool $allow_dirty_styles
|
|
* @type bool $allow_dirty_scripts
|
|
* @type bool $should_locate_sources
|
|
* @type callable $validation_error_callback
|
|
* }
|
|
*/
|
|
protected $args;
|
|
|
|
/**
|
|
* Flag to be set in child class' sanitize() method indicating if the
|
|
* HTML contained in the DOMDocument has been sanitized yet or not.
|
|
*
|
|
* @since 0.2
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $did_convert_elements = false;
|
|
|
|
/**
|
|
* The root element used for sanitization. Either html or body.
|
|
*
|
|
* @var DOMElement
|
|
*/
|
|
protected $root_element;
|
|
|
|
/**
|
|
* Keep track of nodes that should not be removed to prevent duplicated validation errors since sanitization is rejected.
|
|
*
|
|
* @var array
|
|
*/
|
|
private $should_not_removed_nodes = array();
|
|
|
|
/**
|
|
* AMP_Base_Sanitizer constructor.
|
|
*
|
|
* @since 0.2
|
|
*
|
|
* @param DOMDocument $dom Represents the HTML document to sanitize.
|
|
* @param array $args {
|
|
* Args.
|
|
*
|
|
* @type int $content_max_width
|
|
* @type bool $add_placeholder
|
|
* @type bool $require_https_src
|
|
* @type string[] $amp_allowed_tags
|
|
* @type string[] $amp_globally_allowed_attributes
|
|
* @type string[] $amp_layout_allowed_attributes
|
|
* }
|
|
*/
|
|
public function __construct( $dom, $args = array() ) {
|
|
$this->dom = $dom;
|
|
$this->args = array_merge( $this->DEFAULT_ARGS, $args );
|
|
|
|
if ( ! empty( $this->args['use_document_element'] ) ) {
|
|
$this->root_element = $this->dom->documentElement;
|
|
} else {
|
|
$this->root_element = $this->dom->getElementsByTagName( 'body' )->item( 0 );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add filters to manipulate output during output buffering before the DOM is constructed.
|
|
*
|
|
* Add actions and filters before the page is rendered so that the sanitizer can fix issues during output buffering.
|
|
* This provides an alternative to manipulating the DOM in the sanitize method. This is a static function because
|
|
* it is invoked before the class is instantiated, as the DOM is not available yet. This method is only called
|
|
* when 'amp' theme support is present. It is conceptually similar to the AMP_Base_Embed_Handler class's register_embed
|
|
* method.
|
|
*
|
|
* @since 1.0
|
|
* @see \AMP_Base_Embed_Handler::register_embed()
|
|
*
|
|
* @param array $args Args.
|
|
*/
|
|
public static function add_buffering_hooks( $args = array() ) {}
|
|
|
|
/**
|
|
* Get mapping of HTML selectors to the AMP component selectors which they may be converted into.
|
|
*
|
|
* @return array Mapping.
|
|
*/
|
|
public function get_selector_conversion_mapping() {
|
|
return array();
|
|
}
|
|
|
|
/**
|
|
* Run logic before any sanitizers are run.
|
|
*
|
|
* After the sanitizers are instantiated but before calling sanitize on each of them, this
|
|
* method is called with list of all the instantiated sanitizers.
|
|
*
|
|
* @param AMP_Base_Sanitizer[] $sanitizers Sanitizers.
|
|
*/
|
|
public function init( $sanitizers ) {}
|
|
|
|
/**
|
|
* Sanitize the HTML contained in the DOMDocument received by the constructor
|
|
*/
|
|
abstract public function sanitize();
|
|
|
|
/**
|
|
* Return array of values that would be valid as an HTML `script` element.
|
|
*
|
|
* Array keys are AMP element names and array values are their respective
|
|
* Javascript URLs from https://cdn.ampproject.org
|
|
*
|
|
* @since 0.2
|
|
*
|
|
* @return string[] Returns component name as array key and JavaScript URL as array value,
|
|
* respectively. Will return an empty array if sanitization has yet to be run
|
|
* or if it did not find any HTML elements to convert to AMP equivalents.
|
|
*/
|
|
public function get_scripts() {
|
|
return array();
|
|
}
|
|
|
|
/**
|
|
* Return array of values that would be valid as an HTML `style` attribute.
|
|
*
|
|
* @since 0.4
|
|
* @deprecated As of 1.0, use get_stylesheets().
|
|
*
|
|
* @return array[][] Mapping of CSS selectors to arrays of properties.
|
|
*/
|
|
public function get_styles() {
|
|
return array();
|
|
}
|
|
|
|
/**
|
|
* Get stylesheets.
|
|
*
|
|
* @since 0.7
|
|
* @returns array Values are the CSS stylesheets. Keys are MD5 hashes of the stylesheets.
|
|
*/
|
|
public function get_stylesheets() {
|
|
$stylesheets = array();
|
|
|
|
foreach ( $this->get_styles() as $selector => $properties ) {
|
|
$stylesheet = sprintf( '%s { %s }', $selector, join( '; ', $properties ) . ';' );
|
|
|
|
$stylesheets[ md5( $stylesheet ) ] = $stylesheet;
|
|
}
|
|
|
|
return $stylesheets;
|
|
}
|
|
|
|
/**
|
|
* Get HTML body as DOMElement from DOMDocument received by the constructor.
|
|
*
|
|
* @deprecated Just reference $root_element instead.
|
|
* @return DOMElement The body element.
|
|
*/
|
|
protected function get_body_node() {
|
|
return $this->dom->getElementsByTagName( 'body' )->item( 0 );
|
|
}
|
|
|
|
/**
|
|
* Sanitizes a CSS dimension specifier while being sensitive to dimension context.
|
|
*
|
|
* @param string $value A valid CSS dimension specifier; e.g. 50, 50px, 50%.
|
|
* @param string $dimension 'width' or ignored. 'width' only affects $values ending in '%'.
|
|
*
|
|
* @return float|int|string Returns a numeric dimension value, or an empty string.
|
|
*/
|
|
public function sanitize_dimension( $value, $dimension ) {
|
|
|
|
// Allows 0 to be used as valid dimension.
|
|
if ( null === $value ) {
|
|
return '';
|
|
}
|
|
|
|
// Accepts both integers and floats & prevents negative values.
|
|
if ( is_numeric( $value ) ) {
|
|
return max( 0, floatval( $value ) );
|
|
}
|
|
|
|
if ( AMP_String_Utils::endswith( $value, 'px' ) ) {
|
|
return absint( $value );
|
|
}
|
|
|
|
if ( AMP_String_Utils::endswith( $value, '%' ) ) {
|
|
if ( 'width' === $dimension && isset( $this->args['content_max_width'] ) ) {
|
|
$percentage = absint( $value ) / 100;
|
|
return round( $percentage * $this->args['content_max_width'] );
|
|
}
|
|
}
|
|
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* Sets the layout, and possibly the 'height' and 'width' attributes.
|
|
*
|
|
* @param string[] $attributes {
|
|
* Attributes.
|
|
*
|
|
* @type int $height
|
|
* @type int $width
|
|
* @type string $sizes
|
|
* @type string $class
|
|
* @type string $layout
|
|
* }
|
|
* @return array Attributes.
|
|
*/
|
|
public function set_layout( $attributes ) {
|
|
if ( isset( $attributes['layout'] ) && ( 'fill' === $attributes['layout'] || 'flex-item' !== $attributes['layout'] ) ) {
|
|
return $attributes;
|
|
}
|
|
if ( empty( $attributes['height'] ) ) {
|
|
unset( $attributes['width'] );
|
|
$attributes['height'] = self::FALLBACK_HEIGHT;
|
|
}
|
|
if ( empty( $attributes['width'] ) ) {
|
|
$attributes['layout'] = 'fixed-height';
|
|
}
|
|
|
|
return $attributes;
|
|
}
|
|
|
|
/**
|
|
* Adds or appends key and value to list of attributes
|
|
*
|
|
* Adds key and value to list of attributes, or if the key already exists in the array
|
|
* it concatenates to existing attribute separator by a space or other supplied separator.
|
|
*
|
|
* @param string[] $attributes {
|
|
* Attributes.
|
|
*
|
|
* @type int $height
|
|
* @type int $width
|
|
* @type string $sizes
|
|
* @type string $class
|
|
* @type string $layout
|
|
* }
|
|
* @param string $key Valid associative array index to add.
|
|
* @param string $value Value to add or append to array indexed at the key.
|
|
* @param string $separator Optional; defaults to space but some other separator if needed.
|
|
*/
|
|
public function add_or_append_attribute( &$attributes, $key, $value, $separator = ' ' ) {
|
|
if ( isset( $attributes[ $key ] ) ) {
|
|
$attributes[ $key ] = trim( $attributes[ $key ] . $separator . $value );
|
|
} else {
|
|
$attributes[ $key ] = $value;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Decide if we should remove a src attribute if https is required.
|
|
*
|
|
* If not required, the implementing class may want to try and force https instead.
|
|
*
|
|
* @param string $src URL to convert to HTTPS if forced, or made empty if $args['require_https_src'].
|
|
* @param boolean $force_https Force setting of HTTPS if true.
|
|
* @return string URL which may have been updated with HTTPS, or may have been made empty.
|
|
*/
|
|
public function maybe_enforce_https_src( $src, $force_https = false ) {
|
|
$protocol = strtok( $src, ':' ); // @todo What about relative URLs? This should use wp_parse_url( $src, PHP_URL_SCHEME )
|
|
if ( 'https' !== $protocol ) {
|
|
// Check if https is required.
|
|
if ( isset( $this->args['require_https_src'] ) && true === $this->args['require_https_src'] ) {
|
|
// Remove the src. Let the implementing class decide what do from here.
|
|
$src = '';
|
|
} elseif ( ( ! isset( $this->args['require_https_src'] ) || false === $this->args['require_https_src'] )
|
|
&& true === $force_https ) {
|
|
// Don't remove the src, but force https instead.
|
|
$src = set_url_scheme( $src, 'https' );
|
|
}
|
|
}
|
|
|
|
return $src;
|
|
}
|
|
|
|
/**
|
|
* Removes an invalid child of a node.
|
|
*
|
|
* Also, calls the mutation callback for it.
|
|
* This tracks all the nodes that were removed.
|
|
*
|
|
* @since 0.7
|
|
*
|
|
* @param DOMNode|DOMElement $node The node to remove.
|
|
* @param array $validation_error Validation error details.
|
|
* @return bool Whether the node should have been removed, that is, that the node was sanitized for validity.
|
|
*/
|
|
public function remove_invalid_child( $node, $validation_error = array() ) {
|
|
|
|
// Prevent double-reporting nodes that are rejected for sanitization.
|
|
if ( isset( $this->should_not_removed_nodes[ $node->nodeName ] ) && in_array( $node, $this->should_not_removed_nodes[ $node->nodeName ], true ) ) {
|
|
return false;
|
|
}
|
|
|
|
$should_remove = $this->should_sanitize_validation_error( $validation_error, compact( 'node' ) );
|
|
if ( $should_remove ) {
|
|
$node->parentNode->removeChild( $node );
|
|
} else {
|
|
$this->should_not_removed_nodes[ $node->nodeName ][] = $node;
|
|
}
|
|
return $should_remove;
|
|
}
|
|
|
|
/**
|
|
* Removes an invalid attribute of a node.
|
|
*
|
|
* Also, calls the mutation callback for it.
|
|
* This tracks all the attributes that were removed.
|
|
*
|
|
* @since 0.7
|
|
*
|
|
* @param DOMElement $element The node for which to remove the attribute.
|
|
* @param DOMAttr|string $attribute The attribute to remove from the element.
|
|
* @param array $validation_error Validation error details.
|
|
* @return bool Whether the node should have been removed, that is, that the node was sanitized for validity.
|
|
*/
|
|
public function remove_invalid_attribute( $element, $attribute, $validation_error = array() ) {
|
|
if ( is_string( $attribute ) ) {
|
|
$node = $element->getAttributeNode( $attribute );
|
|
} else {
|
|
$node = $attribute;
|
|
}
|
|
$should_remove = $this->should_sanitize_validation_error( $validation_error, compact( 'node' ) );
|
|
if ( $should_remove ) {
|
|
$element->removeAttributeNode( $node );
|
|
}
|
|
return $should_remove;
|
|
}
|
|
|
|
/**
|
|
* Check whether or not sanitization should occur in response to validation error.
|
|
*
|
|
* @since 1.0
|
|
*
|
|
* @param array $validation_error Validation error.
|
|
* @param array $data Data including the node.
|
|
* @return bool Whether to sanitize.
|
|
*/
|
|
public function should_sanitize_validation_error( $validation_error, $data = array() ) {
|
|
if ( empty( $this->args['validation_error_callback'] ) || ! is_callable( $this->args['validation_error_callback'] ) ) {
|
|
return true;
|
|
}
|
|
$validation_error = $this->prepare_validation_error( $validation_error, $data );
|
|
return false !== call_user_func( $this->args['validation_error_callback'], $validation_error, $data );
|
|
}
|
|
|
|
/**
|
|
* Prepare validation error.
|
|
*
|
|
* @param array $error {
|
|
* Error.
|
|
*
|
|
* @type string $code Error code.
|
|
* }
|
|
* @param array $data {
|
|
* Data.
|
|
*
|
|
* @type DOMElement|DOMNode $node The removed node.
|
|
* }
|
|
* @return array Error.
|
|
*/
|
|
public function prepare_validation_error( array $error = array(), array $data = array() ) {
|
|
$node = null;
|
|
$matches = null;
|
|
|
|
if ( isset( $data['node'] ) && $data['node'] instanceof DOMNode ) {
|
|
$node = $data['node'];
|
|
|
|
$error['node_name'] = $node->nodeName;
|
|
if ( $node->parentNode ) {
|
|
$error['parent_name'] = $node->parentNode->nodeName;
|
|
}
|
|
}
|
|
|
|
if ( $node instanceof DOMElement ) {
|
|
if ( ! isset( $error['code'] ) ) {
|
|
$error['code'] = AMP_Validation_Error_Taxonomy::INVALID_ELEMENT_CODE;
|
|
}
|
|
|
|
if ( ! isset( $error['type'] ) ) {
|
|
$error['type'] = 'script' === $node->nodeName ? AMP_Validation_Error_Taxonomy::JS_ERROR_TYPE : AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE;
|
|
}
|
|
|
|
if ( ! isset( $error['node_attributes'] ) ) {
|
|
$error['node_attributes'] = array();
|
|
foreach ( $node->attributes as $attribute ) {
|
|
$error['node_attributes'][ $attribute->nodeName ] = $attribute->nodeValue;
|
|
}
|
|
}
|
|
|
|
// Capture script contents.
|
|
if ( 'script' === $node->nodeName && ! $node->hasAttribute( 'src' ) ) {
|
|
$error['text'] = $node->textContent;
|
|
}
|
|
|
|
// Suppress 'ver' param from enqueued scripts and styles.
|
|
if ( 'script' === $node->nodeName && isset( $error['node_attributes']['src'] ) && false !== strpos( $error['node_attributes']['src'], 'ver=' ) ) {
|
|
$error['node_attributes']['src'] = add_query_arg( 'ver', '__normalized__', $error['node_attributes']['src'] );
|
|
} elseif ( 'link' === $node->nodeName && isset( $error['node_attributes']['href'] ) && false !== strpos( $error['node_attributes']['href'], 'ver=' ) ) {
|
|
$error['node_attributes']['href'] = add_query_arg( 'ver', '__normalized__', $error['node_attributes']['href'] );
|
|
}
|
|
} elseif ( $node instanceof DOMAttr ) {
|
|
if ( ! isset( $error['code'] ) ) {
|
|
$error['code'] = AMP_Validation_Error_Taxonomy::INVALID_ATTRIBUTE_CODE;
|
|
}
|
|
if ( ! isset( $error['type'] ) ) {
|
|
// If this is an attribute that begins with on, like onclick, it should be a js_error.
|
|
$error['type'] = preg_match( '/^on\w+/', $node->nodeName ) ? AMP_Validation_Error_Taxonomy::JS_ERROR_TYPE : AMP_Validation_Error_Taxonomy::HTML_ATTRIBUTE_ERROR_TYPE;
|
|
}
|
|
if ( ! isset( $error['element_attributes'] ) ) {
|
|
$error['element_attributes'] = array();
|
|
if ( $node->parentNode && $node->parentNode->hasAttributes() ) {
|
|
foreach ( $node->parentNode->attributes as $attribute ) {
|
|
$error['element_attributes'][ $attribute->nodeName ] = $attribute->nodeValue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $error;
|
|
}
|
|
|
|
/**
|
|
* Get data-amp-* values from the parent node 'figure' added by editor block.
|
|
*
|
|
* @param DOMElement $node Base node.
|
|
* @return array AMP data array.
|
|
*/
|
|
public function get_data_amp_attributes( $node ) {
|
|
$attributes = array();
|
|
|
|
// Editor blocks add 'figure' as the parent node for images. If this node has data-amp-layout then we should add this as the layout attribute.
|
|
$parent_node = $node->parentNode;
|
|
if ( 'figure' === $parent_node->tagName ) {
|
|
$parent_attributes = AMP_DOM_Utils::get_node_attributes_as_assoc_array( $parent_node );
|
|
if ( isset( $parent_attributes['data-amp-layout'] ) ) {
|
|
$attributes['layout'] = $parent_attributes['data-amp-layout'];
|
|
}
|
|
if ( isset( $parent_attributes['data-amp-noloading'] ) && true === filter_var( $parent_attributes['data-amp-noloading'], FILTER_VALIDATE_BOOLEAN ) ) {
|
|
$attributes['noloading'] = $parent_attributes['data-amp-noloading'];
|
|
}
|
|
}
|
|
|
|
return $attributes;
|
|
}
|
|
|
|
/**
|
|
* Set AMP attributes.
|
|
*
|
|
* @param array $attributes Array of attributes.
|
|
* @param array $amp_data Array of AMP attributes.
|
|
* @return array Updated attributes.
|
|
*/
|
|
public function filter_data_amp_attributes( $attributes, $amp_data ) {
|
|
if ( isset( $amp_data['layout'] ) ) {
|
|
$attributes['data-amp-layout'] = $amp_data['layout'];
|
|
}
|
|
if ( isset( $amp_data['noloading'] ) ) {
|
|
$attributes['data-amp-noloading'] = '';
|
|
}
|
|
return $attributes;
|
|
}
|
|
|
|
/**
|
|
* Set attributes to node's parent element according to layout.
|
|
*
|
|
* @param DOMElement $node Node.
|
|
* @param array $new_attributes Attributes array.
|
|
* @param string $layout Layout.
|
|
* @return array New attributes.
|
|
*/
|
|
public function filter_attachment_layout_attributes( $node, $new_attributes, $layout ) {
|
|
|
|
// The width has to be unset / auto in case of fixed-height.
|
|
if ( 'fixed-height' === $layout ) {
|
|
if ( ! isset( $new_attributes['height'] ) ) {
|
|
$new_attributes['height'] = self::FALLBACK_HEIGHT;
|
|
}
|
|
$new_attributes['width'] = 'auto';
|
|
$node->parentNode->setAttribute( 'style', 'height: ' . $new_attributes['height'] . 'px; width: auto;' );
|
|
|
|
// The parent element should have width/height set and position set in case of 'fill'.
|
|
} elseif ( 'fill' === $layout ) {
|
|
if ( ! isset( $new_attributes['height'] ) ) {
|
|
$new_attributes['height'] = self::FALLBACK_HEIGHT;
|
|
}
|
|
$node->parentNode->setAttribute( 'style', 'position:relative; width: 100%; height: ' . $new_attributes['height'] . 'px;' );
|
|
unset( $new_attributes['width'] );
|
|
unset( $new_attributes['height'] );
|
|
} elseif ( 'responsive' === $layout ) {
|
|
$node->parentNode->setAttribute( 'style', 'position:relative; width: 100%; height: auto' );
|
|
} elseif ( 'fixed' === $layout ) {
|
|
if ( ! isset( $new_attributes['height'] ) ) {
|
|
$new_attributes['height'] = self::FALLBACK_HEIGHT;
|
|
}
|
|
}
|
|
|
|
return $new_attributes;
|
|
}
|
|
|
|
/**
|
|
* Add <amp-image-lightbox> element to body tag if it doesn't exist yet.
|
|
*/
|
|
public function maybe_add_amp_image_lightbox_node() {
|
|
|
|
$nodes = $this->dom->getElementById( self::AMP_IMAGE_LIGHTBOX_ID );
|
|
if ( null !== $nodes ) {
|
|
return;
|
|
}
|
|
|
|
$nodes = $this->dom->getElementsByTagName( 'body' );
|
|
if ( ! $nodes->length ) {
|
|
return;
|
|
}
|
|
$body_node = $nodes->item( 0 );
|
|
$amp_image_lightbox = AMP_DOM_Utils::create_node(
|
|
$this->dom,
|
|
'amp-image-lightbox',
|
|
array(
|
|
'id' => self::AMP_IMAGE_LIGHTBOX_ID,
|
|
'layout' => 'nodisplay',
|
|
'data-close-button-aria-label' => __( 'Close', 'amp' ),
|
|
)
|
|
);
|
|
$body_node->appendChild( $amp_image_lightbox );
|
|
}
|
|
}
|