1
0

Update HTMLPurifier to 4.2.0

This commit is contained in:
Bharat Mediratta 2011-01-08 18:39:08 -08:00
parent 2c9a045ad9
commit 0005833e8e
46 changed files with 599 additions and 114 deletions

View File

@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.0.0
* @version 4.2.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@ -176,6 +176,7 @@ require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php';
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
@ -195,9 +196,12 @@ require 'HTMLPurifier/Token/Start.php';
require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';

View File

@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.0.0 - Standards Compliant HTML Filtering
HTML Purifier 4.2.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@ -55,10 +55,10 @@ class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.0.0';
public $version = '4.2.0';
/** Constant with version of HTML Purifier */
const VERSION = '4.0.0';
const VERSION = '4.2.0';
/** Global configuration object */
public $config;

View File

@ -170,6 +170,7 @@ require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
@ -189,9 +190,12 @@ require_once $__dir . '/HTMLPurifier/Token/Start.php';
require_once $__dir . '/HTMLPurifier/Token/Text.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';

View File

@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string) {
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) break;
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') $i--;
continue;
}
if ($string[$i] === "\n") continue;
}
$ret .= $string[$i];
}
return $ret;
}
}
// vim: et sw=4 sts=4

View File

@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$keywords = array();
$keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom
$keywords['c'] = false; // center
$keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array();
$i = 0;
@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) {
$status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit;
$i++;
}
@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
if (!$i) return false; // no valid values were caught
$ret = array();
// first keyword
if ($keywords['h']) $ret[] = $keywords['h'];
elseif (count($measures)) $ret[] = array_shift($measures);
elseif ($keywords['c']) {
$ret[] = $keywords['c'];
$keywords['c'] = false; // prevent re-use: center = center center
elseif ($keywords['ch']) {
$ret[] = $keywords['ch'];
$keywords['cv'] = false; // prevent re-use: center = center center
}
elseif (count($measures)) $ret[] = array_shift($measures);
if ($keywords['v']) $ret[] = $keywords['v'];
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
elseif (count($measures)) $ret[] = array_shift($measures);
elseif ($keywords['c']) $ret[] = $keywords['c'];
if (empty($ret)) return false;
return implode(' ', $ret);

View File

@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
}
$font = $new_font;
}
$font = $this->expandCSSEscape($font);
// $font is a pure representation of the font name
if (ctype_alnum($font) && $font !== '') {
@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
continue;
}
// complicated font, requires quoting
// bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// armor single quotes and new lines
$font = str_replace("\\", "\\\\", $font);
$font = str_replace("'", "\\'", $font);
$final .= "'$font', ";
// These ugly transforms don't pose a security
// risk (as \\ and \" might). We could try to be clever and
// use single-quote wrapping when there is a double quote
// present, but I have choosen not to implement that.
// (warning: this code relies on the selection of quotation
// mark below)
$font = str_replace('\\', '\\5C ', $font);
$font = str_replace('"', '\\22 ', $font);
// complicated font, requires quoting
$final .= "\"$font\", "; // note that this will later get turned into &quot;
}
$final = rtrim($final, ', ');
if ($final === '') return false;

View File

@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
$uri = substr($uri, 1, $new_length - 1);
}
$keys = array( '(', ')', ',', ' ', '"', "'");
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
$uri = str_replace($values, $keys, $uri);
$uri = $this->expandCSSEscape($uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) return false;
// escape necessary characters according to CSS spec
// except for the comma, none of these should appear in the
// URI at all
$result = str_replace($keys, $values, $result);
// extra sanity check; should have been done by URI
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
return "url($result)";
return "url(\"$result\")";
}

View File

@ -24,7 +24,8 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
if ($src) {
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
$attr['alt'] = basename($attr['src']);
// truncate if the alt is too long
$attr['alt'] = substr(basename($attr['src']),0,40);
} else {
$attr['alt'] = $alt;
}

View File

@ -33,12 +33,25 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
case 'allowNetworking':
$attr['value'] = 'internal';
break;
case 'allowFullScreen':
if ($config->get('HTML.FlashAllowFullScreen')) {
$attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
} else {
$attr['value'] = 'false';
}
break;
case 'wmode':
$attr['value'] = 'window';
break;
case 'movie':
case 'src':
$attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
case 'flashvars':
// we're going to allow arbitrary inputs to the SWF, on
// the reasoning that it could only hack the SWF, not us.
break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;

View File

@ -272,20 +272,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// setup allowed elements
$support = "(for information on implementing this, see the ".
"support forums) ";
$allowed_attributes = $config->get('CSS.AllowedProperties');
if ($allowed_attributes !== null) {
$allowed_properties = $config->get('CSS.AllowedProperties');
if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
unset($allowed_attributes[$name]);
if(!isset($allowed_properties[$name])) unset($this->info[$name]);
unset($allowed_properties[$name]);
}
// emit errors
foreach ($allowed_attributes as $name => $d) {
foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
if ($forbidden_properties !== null) {
foreach ($this->info as $name => $d) {
if (isset($forbidden_properties[$name])) {
unset($this->info[$name]);
}
}
}
}
}

View File

@ -20,7 +20,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
public $version = '4.0.0';
public $version = '4.2.0';
/**
* Bool indicator whether or not to automatically finalize

View File

@ -0,0 +1,11 @@
AutoFormat.RemoveSpansWithoutAttributes
TYPE: bool
VERSION: 4.0.1
DEFAULT: false
--DESCRIPTION--
<p>
This directive causes <code>span</code> tags without any attributes
to be removed. It will also remove spans that had all attributes
removed during processing.
</p>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,13 @@
CSS.ForbiddenProperties
TYPE: lookup
VERSION: 4.2.0
DEFAULT: array()
--DESCRIPTION--
<p>
This is the logical inverse of %CSS.AllowedProperties, and it will
override that directive or any other directive. If possible,
%CSS.AllowedProperties is recommended over this directive,
because it can sometimes be difficult to tell whether or not you've
forbidden all of the CSS properties you truly would like to disallow.
</p>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,11 @@
Core.NormalizeNewlines
TYPE: bool
VERSION: 4.2.0
DEFAULT: true
--DESCRIPTION--
<p>
Whether or not to normalize newlines to the operating
system default. When <code>false</code>, HTML Purifier
will attempt to preserve mixed newline files.
</p>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,11 @@
Core.RemoveProcessingInstructions
TYPE: bool
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
Instead of escaping processing instructions in the form <code>&lt;? ...
?&gt;</code>, remove it out-right. This may be useful if the HTML
you are validating contains XML processing instruction gunk, however,
it can also be user-unfriendly for people attempting to post PHP
snippets.
--# vim: et sw=4 sts=4

View File

@ -3,6 +3,11 @@ TYPE: bool
VERSION: 3.1.0
DEFAULT: false
--DESCRIPTION--
<p>
<strong>Warning:</strong> Deprecated in favor of %HTML.SafeObject and
%Output.FlashCompat (turn both on to allow YouTube videos and other
Flash content).
</p>
<p>
This directive enables YouTube video embedding in HTML Purifier. Check
<a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document

View File

@ -5,11 +5,14 @@ DEFAULT: NULL
--DESCRIPTION--
<p>
This is a convenience directive that rolls the functionality of
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
This is a preferred convenience directive that combines
%HTML.AllowedElements and %HTML.AllowedAttributes.
Specify elements and attributes that are allowed using:
<code>element1[attr1|attr2],element2...</code>. You can also use
newlines instead of commas to separate elements.
<code>element1[attr1|attr2],element2...</code>. For example,
if you would like to only allow paragraphs and links, specify
<code>a[href],p</code>. You can specify attributes that apply
to all elements using an asterisk, e.g. <code>*[lang]</code>.
You can also use newlines instead of commas to separate elements.
</p>
<p>
<strong>Warning</strong>:

View File

@ -4,12 +4,17 @@ VERSION: 1.3.0
DEFAULT: NULL
--DESCRIPTION--
<p>
If HTML Purifier's tag set is unsatisfactory for your needs, you
can overload it with your own list of tags to allow. Note that this
method is subtractive: it does its job by taking away from HTML Purifier
usual feature set, so you cannot add a tag that HTML Purifier never
supported in the first place (like embed, form or head). If you
change this, you probably also want to change %HTML.AllowedAttributes.
If HTML Purifier's tag set is unsatisfactory for your needs, you can
overload it with your own list of tags to allow. If you change
this, you probably also want to change %HTML.AllowedAttributes; see
also %HTML.Allowed which lets you set allowed elements and
attributes at the same time.
</p>
<p>
If you attempt to allow an element that HTML Purifier does not know
about, HTML Purifier will raise an error. You will need to manually
tell HTML Purifier about this element by using the
<a href="http://htmlpurifier.org/docs/enduser-customize.html">advanced customization features.</a>
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the

View File

@ -0,0 +1,11 @@
HTML.FlashAllowFullScreen
TYPE: bool
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit embedded Flash content from
%HTML.SafeObject to expand to the full screen. Corresponds to
the <code>allowFullScreen</code> parameter.
</p>
--# vim: et sw=4 sts=4

View File

@ -7,8 +7,7 @@ DEFAULT: false
Whether or not to permit embed tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to embed tags. Embed is a proprietary
element and will cause your website to stop validating. You probably want
to enable this with %HTML.SafeObject.
<strong>Highly experimental.</strong>
</p>
element and will cause your website to stop validating; you should
see if you can use %Output.FlashCompat with %HTML.SafeObject instead
first.</p>
--# vim: et sw=4 sts=4

View File

@ -6,9 +6,8 @@ DEFAULT: false
<p>
Whether or not to permit object tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to object tags. You may also want to
enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer,
although embed tags will cause your website to stop validating.
<strong>Highly experimental.</strong>
what websites like MySpace do to object tags. You should also enable
%Output.FlashCompat in order to generate Internet Explorer
compatibility code for your object tags.
</p>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,11 @@
Output.FlashCompat
TYPE: bool
VERSION: 4.1.0
DEFAULT: false
--DESCRIPTION--
<p>
If true, HTML Purifier will generate Internet Explorer compatibility
code for all object code. This is highly recommended if you enable
%HTML.SafeObject.
</p>
--# vim: et sw=4 sts=4

View File

@ -12,4 +12,6 @@ array (
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
There is also support for the <code>data</code> and <code>file</code>
URI schemes, but they are not enabled by default.
--# vim: et sw=4 sts=4

View File

@ -1,12 +1,15 @@
URI.DisableResources
TYPE: bool
VERSION: 1.3.0
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
<p>
Disables embedding resources, essentially meaning no pictures. You can
still link to them though. See %URI.DisableExternalResources for why
this might be a good idea.
</p>
<p>
<em>Note:</em> While this directive has been available since 1.3.0,
it didn't actually start doing anything until 4.2.0.
</p>
--# vim: et sw=4 sts=4

View File

@ -97,6 +97,13 @@ class HTMLPurifier_ElementDef
*/
public $autoclose = array();
/**
* If a foreign element is found in this element, test if it is
* allowed by this sub-element; if it is, instead of closing the
* current element, place it inside this element.
*/
public $wrap;
/**
* Whether or not this is a formatting element affected by the
* "Active Formatting Elements" algorithm.

View File

@ -7,13 +7,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
public function preFilter($html, $config, $context) {
$pre_regex = '#<object[^>]+>.+?'.
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s';
$pre_replace = '<span class="youtube-embed">\1</span>';
return preg_replace($pre_regex, $pre_replace, $html);
}
public function postFilter($html, $config, $context) {
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
$post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#';
return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
}
@ -24,10 +24,10 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
protected function postFilterCallback($matches) {
$url = $this->armorUrl($matches[1]);
return '<object width="425" height="350" type="application/x-shockwave-flash" '.
'data="http://www.youtube.com/v/'.$url.'">'.
'<param name="movie" value="http://www.youtube.com/v/'.$url.'"></param>'.
'data="http://www.youtube.com/'.$url.'">'.
'<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'.
'<!--[if IE]>'.
'<embed src="http://www.youtube.com/v/'.$url.'"'.
'<embed src="http://www.youtube.com/'.$url.'"'.
'type="application/x-shockwave-flash"'.
'wmode="transparent" width="425" height="350" />'.
'<![endif]-->'.

View File

@ -31,6 +31,17 @@ class HTMLPurifier_Generator
*/
private $_sortAttr;
/**
* Cache of %Output.FlashCompat
*/
private $_flashCompat;
/**
* Stack for keeping track of object information when outputting IE
* compatibility code.
*/
private $_flashStack = array();
/**
* Configuration for the generator
*/
@ -44,6 +55,7 @@ class HTMLPurifier_Generator
$this->config = $config;
$this->_scriptFix = $config->get('Output.CommentScriptContents');
$this->_sortAttr = $config->get('Output.SortAttr');
$this->_flashCompat = $config->get('Output.FlashCompat');
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
}
@ -86,9 +98,11 @@ class HTMLPurifier_Generator
}
// Normalize newlines to system defined value
$nl = $this->config->get('Output.Newline');
if ($nl === null) $nl = PHP_EOL;
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
if ($this->config->get('Core.NormalizeNewlines')) {
$nl = $this->config->get('Output.Newline');
if ($nl === null) $nl = PHP_EOL;
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
}
return $html;
}
@ -104,12 +118,41 @@ class HTMLPurifier_Generator
} elseif ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name);
if ($this->_flashCompat) {
if ($token->name == "object") {
$flash = new stdclass();
$flash->attr = $token->attr;
$flash->param = array();
$this->_flashStack[] = $flash;
}
}
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token instanceof HTMLPurifier_Token_End) {
return '</' . $token->name . '>';
$_extra = '';
if ($this->_flashCompat) {
if ($token->name == "object" && !empty($this->_flashStack)) {
$flash = array_pop($this->_flashStack);
$compat_token = new HTMLPurifier_Token_Empty("embed");
foreach ($flash->attr as $name => $val) {
if ($name == "classid") continue;
if ($name == "type") continue;
if ($name == "data") $name = "src";
$compat_token->attr[$name] = $val;
}
foreach ($flash->param as $name => $val) {
if ($name == "movie") $name = "src";
$compat_token->attr[$name] = $val;
}
$_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->";
}
}
return $_extra . '</' . $token->name . '>';
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
$this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
}
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
@ -174,7 +217,10 @@ class HTMLPurifier_Generator
* permissible for non-attribute output.
* @return String escaped data.
*/
public function escape($string, $quote = ENT_COMPAT) {
public function escape($string, $quote = null) {
// Workaround for APC bug on Mac Leopard reported by sidepodcast
// http://htmlpurifier.org/phorum/read.php?3,4823,4846
if ($quote === null) $quote = ENT_COMPAT;
return htmlspecialchars($string, $quote, 'UTF-8');
}

View File

@ -300,7 +300,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
unset($allowed_attributes_mutable[$key]);
}
}
if ($delete) unset($this->info[$tag]->attr[$attr]);
if ($delete) {
if ($this->info[$tag]->attr[$attr]->required) {
trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
}
unset($this->info[$tag]->attr[$attr]);
}
}
}
// emit errors

View File

@ -20,8 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List');
public function setup($config) {
$this->addElement('ol', 'List', 'Required: li', 'Common');
$this->addElement('ul', 'List', 'Required: li', 'Common');
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
$ol->wrap = "li";
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
$ul->wrap = "li";
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common');

View File

@ -20,6 +20,7 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
'height' => 'Pixels#' . $max,
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
'flashvars' => 'Text',
'wmode' => 'Enum#window',
'name' => 'ID',
)

View File

@ -28,7 +28,10 @@ class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'data' => 'URI#embedded'
'data' => 'URI#embedded',
'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000',
'codebase' => new HTMLPurifier_AttrDef_Enum(array(
'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
)
);
$object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();

View File

@ -15,6 +15,7 @@ class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_T
$r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
$r['table@height'] = new HTMLPurifier_AttrTransform_Length('height');
return $r;
}

View File

@ -34,16 +34,21 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// ----
// This is a degenerate case
} else {
// State 1.2: PAR1
// ----
if (!$token->is_whitespace || $this->_isInline($current)) {
// State 1.2: PAR1
// ----
// State 1.3: PAR1\n\nPAR2
// ------------
// State 1.3: PAR1\n\nPAR2
// ------------
// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------
$token = array($this->_pStart());
$this->_splitText($text, $token);
// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------
$token = array($this->_pStart());
$this->_splitText($text, $token);
} else {
// State 1.5: \n<hr />
// --
}
}
} else {
// State 2: <div>PAR1... (similar to 1.4)

View File

@ -0,0 +1,60 @@
<?php defined("SYSPATH") or die("No direct script access.");
/**
* Injector that removes spans with no attributes
*/
class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
{
public $name = 'RemoveSpansWithoutAttributes';
public $needed = array('span');
private $attrValidator;
/**
* Used by AttrValidator
*/
private $config;
private $context;
public function prepare($config, $context) {
$this->attrValidator = new HTMLPurifier_AttrValidator();
$this->config = $config;
$this->context = $context;
return parent::prepare($config, $context);
}
public function handleElement(&$token) {
if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
return;
}
// We need to validate the attributes now since this doesn't normally
// happen until after MakeWellFormed. If all the attributes are removed
// the span needs to be removed too.
$this->attrValidator->validateToken($token, $this->config, $this->context);
$token->armor['ValidateAttributes'] = true;
if (!empty($token->attr)) {
return;
}
$nesting = 0;
$spanContentTokens = array();
while ($this->forwardUntilEndToken($i, $current, $nesting)) {}
if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
// Mark closing span tag for deletion
$current->markForDeletion = true;
// Delete open span tag
$token = false;
}
}
public function handleEnd(&$token) {
if ($token->markForDeletion) {
$token = false;
}
}
}
// vim: et sw=4 sts=4

View File

@ -20,6 +20,9 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
protected $allowedParam = array(
'wmode' => true,
'movie' => true,
'flashvars' => true,
'src' => true,
'allowFullScreen' => true, // if omitted, assume to be 'false'
);
public function prepare($config, $context) {
@ -47,7 +50,8 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
// We need this fix because YouTube doesn't supply a data
// attribute, which we need if a type is specified. This is
// *very* Flash specific.
if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') {
if (!isset($this->objectStack[$i]->attr['data']) &&
($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) {
$this->objectStack[$i]->attr['data'] = $token->attr['value'];
}
// Check if the parameter is the correct value but has not

View File

@ -23,6 +23,7 @@ $messages = array(
'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
'Lexer: Extracted body' => 'Removed document metadata tags',
'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized',
'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',

View File

@ -230,6 +230,17 @@ class HTMLPurifier_Lexer
);
}
/**
* Special Internet Explorer conditional comments should be removed.
*/
protected static function removeIEConditional($string) {
return preg_replace(
'#<!--\[if [^>]+\]>.*<!\[endif\]-->#si', // probably should generalize for all strings
'',
$string
);
}
/**
* Callback function for escapeCDATA() that does the work.
*
@ -252,20 +263,32 @@ class HTMLPurifier_Lexer
public function normalize($html, $config, $context) {
// normalize newlines to \n
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
if ($config->get('Core.NormalizeNewlines')) {
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
}
if ($config->get('HTML.Trusted')) {
// escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html);
}
$html = $this->removeIEConditional($html);
// escape CDATA
$html = $this->escapeCDATA($html);
// extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) {
$html = $this->extractBody($html);
$e = false;
if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
$new_html = $this->extractBody($html);
if ($e && $new_html != $html) {
$e->send(E_WARNING, 'Lexer: Extracted body');
}
$html = $new_html;
}
// expand entities that aren't the big five
@ -276,6 +299,11 @@ class HTMLPurifier_Lexer
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
// if processing instructions are to removed, remove them now
if ($config->get('Core.RemoveProcessingInstructions')) {
$html = preg_replace('#<\?.+?\?>#s', '', $html);
}
return $html;
}

View File

@ -384,7 +384,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
}
if ($value === false) $value = '';
return array($key => $value);
return array($key => $this->parseData($value));
}
// setup loop environment

View File

@ -26,13 +26,20 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
* Internal accumulator array for SAX parsers.
*/
protected $tokens = array();
protected $last_token_was_empty;
private $parent_handler;
private $stack = array();
public function tokenizeHTML($string, $config, $context) {
$this->tokens = array();
$this->last_token_was_empty = false;
$string = $this->normalize($string, $config, $context);
$this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
$parser = new XML_HTMLSax3();
$parser->set_object($this);
$parser->set_element_handler('openHandler','closeHandler');
@ -44,6 +51,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
$parser->parse($string);
restore_error_handler();
return $this->tokens;
}
@ -58,9 +67,11 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
}
if ($closed) {
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
$this->last_token_was_empty = true;
} else {
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
}
$this->stack[] = $name;
return true;
}
@ -71,10 +82,12 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) {
if ($this->last_token_was_empty) {
$this->last_token_was_empty = false;
return true;
}
$this->tokens[] = new HTMLPurifier_Token_End($name);
if (!empty($this->stack)) array_pop($this->stack);
return true;
}
@ -82,6 +95,7 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
* Data event handler, interface is defined by PEAR package.
*/
public function dataHandler(&$parser, $data) {
$this->last_token_was_empty = false;
$this->tokens[] = new HTMLPurifier_Token_Text($data);
return true;
}
@ -91,7 +105,18 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
*/
public function escapeHandler(&$parser, $data) {
if (strpos($data, '--') === 0) {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
// remove trailing and leading double-dashes
$data = substr($data, 2);
if (strlen($data) >= 2 && substr($data, -2) == "--") {
$data = substr($data, 0, -2);
}
if (isset($this->stack[sizeof($this->stack) - 1]) &&
$this->stack[sizeof($this->stack) - 1] == "style") {
$this->tokens[] = new HTMLPurifier_Token_Text($data);
} else {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
}
$this->last_token_was_empty = false;
}
// CDATA is handled elsewhere, but if it was handled here:
//if (strpos($data, '[CDATA[') === 0) {
@ -101,6 +126,14 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
return true;
}
/**
* An error handler that mutes strict errors
*/
public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
if ($errno == E_STRICT) return;
return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
}
}
// vim: et sw=4 sts=4

View File

@ -125,8 +125,6 @@ class HTML5 {
const EOF = 5;
public function __construct($data) {
$data = str_replace("\r\n", "\n", $data);
$data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;
@ -3903,4 +3901,4 @@ class HTML5TreeConstructer {
return $this->dom;
}
}
?>

View File

@ -83,6 +83,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->injectors[] = $injector;
}
foreach ($custom_injectors as $injector) {
if (!$injector) continue;
if (is_string($injector)) {
$injector = "HTMLPurifier_Injector_$injector";
$injector = new $injector;
@ -164,6 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$token = $tokens[$t];
//echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
//flush();
// quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) {
@ -219,6 +221,22 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$autoclose = false;
}
if ($autoclose && $definition->info[$token->name]->wrap) {
// Check if an element can be wrapped by another
// element to make it valid in a context (for
// example, <ul><ul> needs a <li> in between)
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$elements = $wrapdef->child->getAllowedElements($config);
$parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
$newtoken = new HTMLPurifier_Token_Start($wrapname);
$this->insertBefore($newtoken);
$reprocess = true;
continue;
}
}
$carryover = false;
if ($autoclose && $definition->info[$parent->name]->formatting) {
$carryover = true;

View File

@ -0,0 +1,11 @@
<?php defined("SYSPATH") or die("No direct script access.");
class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter
{
public $name = 'DisableResources';
public function filter(&$uri, $config, $context) {
return !$context->get('EmbeddedURI', true);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,93 @@
<?php defined("SYSPATH") or die("No direct script access.");
/**
* Implements data: URI for base64 encoded images supported by GD.
*/
class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
public $browsable = true;
public $allowed_types = array(
// you better write validation code for other types if you
// decide to allow them
'image/jpeg' => true,
'image/gif' => true,
'image/png' => true,
);
public function validate(&$uri, $config, $context) {
$result = explode(',', $uri->path, 2);
$is_base64 = false;
$charset = null;
$content_type = null;
if (count($result) == 2) {
list($metadata, $data) = $result;
// do some legwork on the metadata
$metas = explode(';', $metadata);
while(!empty($metas)) {
$cur = array_shift($metas);
if ($cur == 'base64') {
$is_base64 = true;
break;
}
if (substr($cur, 0, 8) == 'charset=') {
// doesn't match if there are arbitrary spaces, but
// whatever dude
if ($charset !== null) continue; // garbage
$charset = substr($cur, 8); // not used
} else {
if ($content_type !== null) continue; // garbage
$content_type = $cur;
}
}
} else {
$data = $result[0];
}
if ($content_type !== null && empty($this->allowed_types[$content_type])) {
return false;
}
if ($charset !== null) {
// error; we don't allow plaintext stuff
$charset = null;
}
$data = rawurldecode($data);
if ($is_base64) {
$raw_data = base64_decode($data);
} else {
$raw_data = $data;
}
// XXX probably want to refactor this into a general mechanism
// for filtering arbitrary content types
$file = tempnam("/tmp", "");
file_put_contents($file, $raw_data);
if (function_exists('exif_imagetype')) {
$image_code = exif_imagetype($file);
} elseif (function_exists('getimagesize')) {
set_error_handler(array($this, 'muteErrorHandler'));
$info = getimagesize($file);
restore_error_handler();
if ($info == false) return false;
$image_code = $info[2];
} else {
trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
}
$real_content_type = image_type_to_mime_type($image_code);
if ($real_content_type != $content_type) {
// we're nice guys; if the content type is something else we
// support, change it over
if (empty($this->allowed_types[$real_content_type])) return false;
$content_type = $real_content_type;
}
// ok, it's kosher, rewrite what we need
$uri->userinfo = null;
$uri->host = null;
$uri->port = null;
$uri->fragment = null;
$uri->query = null;
$uri->path = "$content_type;base64," . base64_encode($raw_data);
return true;
}
public function muteErrorHandler($errno, $errstr) {}
}

View File

@ -0,0 +1,26 @@
<?php defined("SYSPATH") or die("No direct script access.");
/**
* Validates file as defined by RFC 1630 and RFC 1738.
*/
class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme {
// Generally file:// URLs are not accessible from most
// machines, so placing them as an img src is incorrect.
public $browsable = false;
public function validate(&$uri, $config, $context) {
parent::validate($uri, $config, $context);
// Authentication method is not supported
$uri->userinfo = null;
// file:// makes no provisions for accessing the resource
$uri->port = null;
// While it seems to work on Firefox, the querystring has
// no possible effect and is thus stripped.
$uri->query = null;
return true;
}
}
// vim: et sw=4 sts=4

View File

@ -62,7 +62,7 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
foreach ($var as $keypair) {
$c = explode(':', $keypair, 2);
if (!isset($c[1])) continue;
$nvar[$c[0]] = $c[1];
$nvar[trim($c[0])] = trim($c[1]);
}
$var = $nvar;
}
@ -79,8 +79,15 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
return $new;
} else break;
}
if ($type === self::ALIST) {
trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING);
return array_values($var);
}
if ($type === self::LOOKUP) {
foreach ($var as $key => $value) {
if ($value !== true) {
trigger_error("Lookup array has non-true value at key '$key'; maybe your input array was not indexed numerically", E_USER_WARNING);
}
$var[$key] = true;
}
}