Skip to content
Snippets Groups Projects
Commit 1fe7a4e8 authored by Peter Rotich's avatar Peter Rotich
Browse files

html: Decode html entities before sanitizing

Encoded entities can be used to bypass safety checks
Don't remove iframe when using xml_dom to balance tags
parent 9ebad3be
No related branches found
No related tags found
No related merge requests found
......@@ -136,7 +136,8 @@ class Format {
// Remove empty nodes
$xpath = new DOMXPath($doc);
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
'iframe' => 1, 'hr'=>1, 'img'=>1, 'input'=>1,
'isindex'=>1, 'param'=>1);
do {
$done = true;
$nodes = $xpath->query('//*[not(text()) and not(node())]');
......@@ -218,6 +219,17 @@ class Format {
static function __html_cleanup($el, $attributes=0) {
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
// We're dealing with closing tag
if ($attributes === 0)
return "</{$el}>";
// Remove iframe and embed without src (perhaps striped by spec)
// It would be awesome to rickroll such entry :)
if (in_array($el, array('iframe', 'embed'))
&& (!isset($attributes['src']) || empty($attributes['src'])))
return '';
// Clean unexpected class values
if (isset($attributes['class'])) {
$classes = explode(' ', $attributes['class']);
......@@ -268,7 +280,20 @@ class Format {
}
}
function safe_html($html, $balance=1) {
function safe_html($html, $options=array()) {
$options = array_merge(array(
// Balance html tags
'balance' => 1,
// Decoding special html char like &lt; and &gt; which
// can be used to skip cleaning
'decode' => true
),
$options);
if ($options['decode'])
$html = Format::htmldecode($html);
// Remove HEAD and STYLE sections
$html = preg_replace(
array(':<(head|style|script).+?</\1>:is', # <head> and <style> sections
......@@ -278,9 +303,11 @@ class Format {
),
array('', '', '', ''),
$html);
// HtmLawed specific config only
$config = array(
'safe' => 1, //Exclude applet, embed, iframe, object and script tags.
'balance' => $balance,
'balance' => $options['balance'],
'comment' => 1, //Remove html comments (OUTLOOK LOVE THEM)
'tidy' => -1,
'deny_attribute' => 'id',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment