From 1fe7a4e8085396617710b76d886f213f70a4a9bd Mon Sep 17 00:00:00 2001
From: Peter Rotich <peter@osticket.com>
Date: Fri, 26 Feb 2016 17:03:33 +0000
Subject: [PATCH] html: Decode html entities before sanitizing

Encoded entities can be used to bypass safety checks
Don't remove iframe when using xml_dom to balance tags
---
 include/class.format.php | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/include/class.format.php b/include/class.format.php
index be314d467..c1acf4e03 100644
--- a/include/class.format.php
+++ b/include/class.format.php
@@ -136,7 +136,8 @@ class Format {
             // Remove empty nodes
             $xpath = new DOMXPath($doc);
             static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
-                'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
+                    'iframe' => 1, 'hr'=>1, 'img'=>1, 'input'=>1,
+                    'isindex'=>1, 'param'=>1);
             do {
                 $done = true;
                 $nodes = $xpath->query('//*[not(text()) and not(node())]');
@@ -218,6 +219,17 @@ class Format {
     static function __html_cleanup($el, $attributes=0) {
         static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
             'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
+
+        // We're dealing with closing tag
+        if ($attributes === 0)
+            return "</{$el}>";
+
+        // Remove iframe and embed without src (perhaps striped by spec)
+        // It would be awesome to rickroll such entry :)
+        if (in_array($el, array('iframe', 'embed'))
+                && (!isset($attributes['src']) || empty($attributes['src'])))
+            return '';
+
         // Clean unexpected class values
         if (isset($attributes['class'])) {
             $classes = explode(' ', $attributes['class']);
@@ -268,7 +280,20 @@ class Format {
         }
     }
 
-    function safe_html($html, $balance=1) {
+    function safe_html($html, $options=array()) {
+
+        $options = array_merge(array(
+                    // Balance html tags
+                    'balance' => 1,
+                    // Decoding special html char like &lt; and &gt; which
+                    // can be used to skip cleaning
+                    'decode' => true
+                    ),
+                $options);
+
+        if ($options['decode'])
+            $html = Format::htmldecode($html);
+
         // Remove HEAD and STYLE sections
         $html = preg_replace(
             array(':<(head|style|script).+?</\1>:is', # <head> and <style> sections
@@ -278,9 +303,11 @@ class Format {
             ),
             array('', '', '', ''),
             $html);
+
+        // HtmLawed specific config only
         $config = array(
             'safe' => 1, //Exclude applet, embed, iframe, object and script tags.
-            'balance' => $balance,
+            'balance' => $options['balance'],
             'comment' => 1, //Remove html comments (OUTLOOK LOVE THEM)
             'tidy' => -1,
             'deny_attribute' => 'id',
-- 
GitLab