Skip to content

Commit

Permalink
Tag Processor: Decode/Encode HTML attributs in get_attribute/set_attr…
Browse files Browse the repository at this point in the history
…ibute to prevent XSS (#44447)

Co-authored-by: Dennis Snell <[email protected]>
  • Loading branch information
adamziel and dmsnell authored Sep 29, 2022
1 parent 5073ce2 commit b58ff34
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 8 deletions.
9 changes: 6 additions & 3 deletions lib/experimental/html/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,9 @@ public function get_attribute( $name ) {
return true;
}

return substr( $this->html, $attribute->value_starts_at, $attribute->value_length );
$raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length );

return html_entity_decode( $raw_value );
}

/**
Expand Down Expand Up @@ -945,6 +947,8 @@ public function get_tag() {
* - When `true` is passed as the value, then only the attribute name is added to the tag.
* - When `false` is passed, the attribute gets removed if it existed before.
*
* For string attributes, the value is escaped using the `esc_attr` function.
*
* @since 6.2.0
*
* @param string $name The attribute name to target.
Expand All @@ -968,8 +972,7 @@ public function set_attribute( $name, $value ) {
if ( true === $value ) {
$updated_attribute = $name;
} else {
// @TODO: What escaping and sanitization do we need here?
$escaped_new_value = str_replace( '"', '&quot;', $value );
$escaped_new_value = esc_attr( $value );
$updated_attribute = "{$name}=\"{$escaped_new_value}\"";
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
<?php
/**
* Unit tests covering WP_HTML_Tag_Processor functionality.
* This file takes about 100ms to run because it does not load
* any WordPress libraries:
*
* ```
* ./vendor/bin/phpunit --no-configuration ./phpunit/html/wp-html-tag-processor-test.php
* ```
*
* Put all new WP_HTML_Tag_Processor tests here, and only add new cases to
* wp-html-tag-processor-test-wp.php when they cannot run without WordPress.
*
* @package WordPress
* @subpackage HTML
*/

if ( ! function_exists( 'esc_attr' ) ) {
function esc_attr( $s ) {
return str_replace( '"', '&quot;', $s );
function esc_attr( $string ) {
return htmlspecialchars( $string, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'utf-8', false );
}
}

Expand All @@ -23,7 +32,7 @@ abstract class WP_UnitTestCase extends \PHPUnit\Framework\TestCase {}
*
* @coversDefaultClass WP_HTML_Tag_Processor
*/
class WP_HTML_Tag_Processor_Test extends WP_UnitTestCase {
class WP_HTML_Tag_Processor_Standalone_Test extends WP_UnitTestCase {
const HTML_SIMPLE = '<div id="first"><span id="second">Text</span></div>';
const HTML_WITH_CLASSES = '<div class="main with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>';
const HTML_MALFORMED = '<div><span class="d-md-none" Notifications</span><span class="d-none d-md-inline">Back to notifications</span></div>';
Expand Down Expand Up @@ -134,6 +143,17 @@ public function test_get_attribute_returns_string_for_truthy_attributes() {
$this->assertSame( 'true', $p->get_attribute( 'hidden' ), 'Accessing a hidden="true" attribute value did not return "true"' );
}

/**
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_decodes_html_character_references() {
$p = new WP_HTML_Tag_Processor( '<div id="the &quot;grande&quot; is &lt; &#x033;&#50;oz&dagger;"></div>' );
$p->next_tag();
$this->assertSame( 'the "grande" is < 32oz†', $p->get_attribute( 'id' ), 'HTML Attribute value was returned without decoding character references' );
}

/**
* @ticket 56299
*
Expand Down Expand Up @@ -235,6 +255,68 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar
);
}

/**
* Passing a double quote inside of an attribute values could lead to an XSS attack as follows:
*
* <code>
* $p = new WP_HTML_Tag_Processor( '<div class="header"></div>' );
* $p->next_tag();
* $p->set_attribute('class', '" onclick="alert');
* echo $p;
* // <div class="" onclick="alert"></div>
* </code>
*
* To prevent it, `set_attribute` calls `esc_attr()` on its given values.
*
* <code>
* <div class="&quot; onclick=&quot;alert"></div>
* </code>
*
* @ticket 56299
*
* @dataProvider data_set_attribute_escapable_values
* @covers set_attribute
*/
public function test_set_attribute_prevents_xss( $attribute_value ) {
$p = new WP_HTML_Tag_Processor( '<div></div>' );
$p->next_tag();
$p->set_attribute( 'test', $attribute_value );

/*
* Testing the escaping is hard using tools that properly parse
* HTML because they might interpret the escaped values. It's hard
* with tools that don't understand HTML because they might get
* confused by improperly-escaped values.
*
* For this test, since we control the input HTML we're going to
* do what looks like the opposite of what we want to be doing with
* this library but are only doing so because we have full control
* over the content and because we want to look at the raw values.
*/
$match = null;
preg_match( '~^<div test=(.*)></div>$~', (string) $p, $match );
list( , $actual_value ) = $match;

$this->assertEquals( $actual_value, '"' . esc_attr( $attribute_value ) . '"' );
}

/**
* Data provider with HTML attribute values that might need escaping.
*/
public function data_set_attribute_escapable_values() {
return array(
array( '"' ),
array( '&quot;' ),
array( '&' ),
array( '&amp;' ),
array( '&euro;' ),
array( "'" ),
array( '<>' ),
array( '&quot";' ),
array( '" onclick="alert(\'1\');"><span onclick=""></span><script>alert("1")</script>' ),
);
}

/**
* @ticket 56299
*
Expand Down Expand Up @@ -999,12 +1081,12 @@ public function data_malformed_tag() {

$examples['HTML tag opening inside attribute value'] = array(
'<pre id="<code" class="wp-block-code <code is poetry&gt;"><code>This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'<pre foo="bar" id="<code" class="wp-block-code <code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'<pre foo="bar" id="<code" class="wp-block-code &lt;code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
);

$examples['HTML tag brackets in attribute values and data markup'] = array(
'<pre id="<code-&gt;-block-&gt;" class="wp-block-code <code is poetry&gt;"><code>This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'<pre foo="bar" id="<code-&gt;-block-&gt;" class="wp-block-code <code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'<pre foo="bar" id="<code-&gt;-block-&gt;" class="wp-block-code &lt;code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
);

$examples['Single and double quotes in attribute value'] = array(
Expand Down
91 changes: 91 additions & 0 deletions phpunit/html/wp-html-tag-processor-wp-test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<?php
/**
* Unit tests covering WP_HTML_Tag_Processor functionality.
* This file is only for tests that require WordPress libraries.
* Put all other tests in wp-html-tag-processor-test-standalone.php.
*
* Run these tests locally as follows:
*
* ```
* npx wp-env run phpunit "phpunit -c /var/www/html/wp-content/plugins/gutenberg/phpunit.xml.dist --filter WP_HTML_Tag_Processor_Test_WP"
* ```
*
* @package WordPress
* @subpackage HTML
*/

require_once __DIR__ . '/../../lib/experimental/html/index.php';

/**
* @group html
*
* @coversDefaultClass WP_HTML_Tag_Processor
*/
class WP_HTML_Tag_Processor_Test_WP extends WP_UnitTestCase {

/**
* Passing a double quote inside of an attribute values could lead to an XSS attack as follows:
*
* <code>
* $p = new WP_HTML_Tag_Processor( '<div class="header"></div>' );
* $p->next_tag();
* $p->set_attribute('class', '" onclick="alert');
* echo $p;
* // <div class="" onclick="alert"></div>
* </code>
*
* To prevent it, `set_attribute` calls `esc_attr()` on its given values.
*
* <code>
* <div class="&quot; onclick=&quot;alert"></div>
* </code>
*
* @ticket 56299
*
* @dataProvider data_set_attribute_escapable_values
* @covers set_attribute
*/
public function test_set_attribute_prevents_xss( $value_to_set, $expected_result ) {
$p = new WP_HTML_Tag_Processor( '<div></div>' );
$p->next_tag();
$p->set_attribute( 'test', $value_to_set );

/*
* Testing the escaping is hard using tools that properly parse
* HTML because they might interpret the escaped values. It's hard
* with tools that don't understand HTML because they might get
* confused by improperly-escaped values.
*
* For this test, since we control the input HTML we're going to
* do what looks like the opposite of what we want to be doing with
* this library but are only doing so because we have full control
* over the content and because we want to look at the raw values.
*/
$match = null;
preg_match( '~^<div test=(.*)></div>$~', (string) $p, $match );
list( , $actual_value ) = $match;

$this->assertEquals( $actual_value, '"' . $expected_result . '"' );
}

/**
* Data provider with HTML attribute values that might need escaping.
*/
public function data_set_attribute_escapable_values() {
return array(
array( '"', '&quot;' ),
array( '&quot;', '&quot;' ),
array( '&', '&amp;' ),
array( '&amp;', '&amp;' ),
array( '&euro;', '&euro;' ),
array( "'", '&#039;' ),
array( '<>', '&lt;&gt;' ),
array( '&quot";', '&amp;quot&quot;;' ),
array(
'" onclick="alert(\'1\');"><span onclick=""></span><script>alert("1")</script>',
'&quot; onclick=&quot;alert(&#039;1&#039;);&quot;&gt;&lt;span onclick=&quot;&quot;&gt;&lt;/span&gt;&lt;script&gt;alert(&quot;1&quot;)&lt;/script&gt;',
),
);
}

}

0 comments on commit b58ff34

Please sign in to comment.