diff --git a/include/class.format.php b/include/class.format.php index 11f0b1040b7e5a8e36f3ba7b65b92cf326dfe21e..d1007a2ac0f5fb7890a242069647dfb03f45ef05 100644 --- a/include/class.format.php +++ b/include/class.format.php @@ -138,12 +138,39 @@ class Format { } function safe_html($html) { + // Remove HEAD and STYLE sections + $html = preg_replace(':<(head|style).+</\1>:is','', $html); $config = array( - 'safe' => 1, //Exclude applet, embed, iframe, object and script tags. - 'balance' => 1, //balance and close unclosed tags. - 'comment' => 1, //Remove html comments (OUTLOOK LOVE THEM) - 'schemes' => 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https; src: cid, http, https, data' - ); + 'safe' => 1, //Exclude applet, embed, iframe, object and script tags. + 'balance' => 1, //balance and close unclosed tags. + 'comment' => 1, //Remove html comments (OUTLOOK LOVE THEM) + 'deny_attribute' => 'id', + 'schemes' => 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https; src: cid, http, https, data', + 'hook_tag' => function ($el, $attributes=0) { + static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, + 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); + if (isset($attributes['class'])) { + $classes = explode(' ', $attributes['class']); + foreach ($classes as $i=>$a) + // Unset all unsupported style classes -- anything by M$ + if (strpos($a, 'Mso') !== 0) + unset($classes[$i]); + if ($classes) + $attributes['class'] = implode(' ', $classes); + else + unset($attributes['class']); + } + $at = ''; + if (is_array($attributes)) { + foreach ($attributes as $k=>$v) + $at .= " $k=\"$v\""; + return "<{$el}{$at}".(isset($eE[$el])?" /":"").">"; + } + else { + return "</{$el}>"; + } + } + ); if (!preg_match('/style="[^"]*white-space:\s*pre/i', $html) !== false) $config['tidy'] = -1; // Clean extra whitspace diff --git a/include/htmLawed.php b/include/htmLawed.php index 28d234824fb4765e4e60544774c64732ef0d4e67..6d25f1f98a714269c3b65d87ca6f3dcb5db96dcb 100644 --- a/include/htmLawed.php +++ b/include/htmLawed.php @@ -1,9 +1,9 @@ <?php /* -htmLawed 1.1.10, 22 October 2011 +htmLawed 1.1.16, 29 August 2013 Copyright Santosh Patnaik -LGPL v3 license +Dual licensed with LGPL 3 and GPL 2+ A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed See htmLawed_README.txt/htm @@ -68,7 +68,7 @@ $C['css_expression'] = empty($C['css_expression']) ? 0 : 1; $C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; $C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; -$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; +$C['hook_tag'] = (!empty($C['hook_tag']) && is_callable($C['hook_tag'])) ? $C['hook_tag'] : 0; $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6; $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1; $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1; @@ -194,7 +194,10 @@ for($i=-1, $ci=count($t); ++$i<$ci;){ echo '<', $s, $e, $a, '>'; } if(isset($x[0])){ - if($do < 3 or isset($ok['#pcdata'])){echo $x;} + if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){ + echo '<div>', $x, '</div>'; + } + elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} elseif(strpos($x, "\x02\x04")){ foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); @@ -202,14 +205,14 @@ for($i=-1, $ci=count($t); ++$i<$ci;){ }elseif($do > 4){echo preg_replace('`\S`', '', $x);} } // get markup - if(!preg_match('`^(/?)([a-zA-Z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;} + if(!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;} $s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r; // close tag if($s){ if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen if($p == $e){array_pop($q); echo '</', $e, '>'; unset($e); continue;} // Last open $add = ''; // Nesting - close open tags that need to be - for($j=-1, $cj=count($q); ++$j<$cj;){ + for($j=-1, $cj=count($q); ++$j<$cj;){ if(($d = array_pop($q)) == $e){break;} else{$add .= "</{$d}>";} } @@ -333,7 +336,7 @@ $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; static $d = 'denied:'; if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";} if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param -if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot +if(preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot return "{$b}{$d}{$p}{$a}"; } if($C['abs_url']){ @@ -376,7 +379,7 @@ return $r; function hl_spec($t){ // final $spec $s = array(); -$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t))); +$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t))); for($i = count(($t = explode(';', $t))); --$i>=0;){ $w = $t[$i]; if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;} @@ -427,7 +430,7 @@ if($C['make_tag_strict'] && isset($eD[$e])){ // close tag static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele if(!empty($m[1])){ - return (!isset($eE[$e]) ? "</$e>" : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); + return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); } // open tag & attr @@ -470,8 +473,8 @@ while(strlen($a)){ $aA[$nm] = ''; } break; case 2: // Val - if(preg_match('`^"[^"]*"`', $a, $m) or preg_match("`^'[^']*'`", $a, $m) or preg_match("`^\s*[^\s\"']+`", $a, $m)){ - $m = $m[0]; $w = 1; $mode = 0; $a = ltrim(substr_replace($a, '', 0, strlen($m))); + if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){ + $a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0; $aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m); } break; @@ -488,7 +491,7 @@ global $S; $rl = isset($S[$e]) ? $S[$e] : array(); $a = array(); $nfr = 0; foreach($aA as $k=>$v){ - if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) or isset($rl[$k])) && ((!isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e])))){ + if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])){ if(isset($aNE[$k])){$v = $k;} elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues $v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v; @@ -622,7 +625,7 @@ if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';} static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); if($e == 'font'){ $a2 = ''; - if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=\s*([^"])(\S+)`i', $a, $m)){ + if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){ $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';'; } if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){ @@ -641,41 +644,50 @@ return ''; function hl_tidy($t, $w, $p){ // Tidy/compact HTM if(strpos(' pre,script,textarea', "$p,")){return $t;} -$t = str_replace(' </', '</', preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); +$t = preg_replace('`\s+`', ' ', preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t)); if(($w = strtolower($w)) == -1){ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); } $s = strpos(" $w", 't') ? "\t" : ' '; $s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2)); -$n = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; +$N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; $a = array('br'=>1); -$b = array('button'=>1, 'input'=>1, 'option'=>1); +$b = array('button'=>1, 'input'=>1, 'option'=>1, 'param'=>1); $c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1); -$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); -ob_start(); -if(isset($d[$p])){echo str_repeat($s, ++$n);} -$t = explode('<', $t); -echo ltrim(array_shift($t)); -for($i=-1, $j=count($t); ++$i<$j;){ - $r = ''; list($e, $r) = explode('>', $t[$i]); - $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); - $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); - $e = "<$e>"; - if(isset($d[$y])){ - if(!$x){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);} - else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} - echo ltrim($r); continue; +$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); +$T = explode('<', $t); +$X = 1; +while($X){ + $n = $N; + $t = $T; + ob_start(); + if(isset($d[$p])){echo str_repeat($s, ++$n);} + echo ltrim(array_shift($t)); + for($i=-1, $j=count($t); ++$i<$j;){ + $r = ''; list($e, $r) = explode('>', $t[$i]); + $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); + $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); + $e = "<$e>"; + if(isset($d[$y])){ + if(!$x){ + if($n){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);} + else{++$N; ob_end_clean(); continue 2;} + } + else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} + echo $r; continue; + } + $f = "\n". str_repeat($s, $n); + if(isset($c[$y])){ + if(!$x){echo $e, $f, $r;} + else{echo $f, $e, $r;} + }elseif(isset($b[$y])){echo $f, $e, $r; + }elseif(isset($a[$y])){echo $e, $f, $r; + }elseif(!$y){echo $f, $e, $f, $r; + }else{echo $e, $r;} } - $f = "\n". str_repeat($s, $n); - if(isset($c[$y])){ - if(!$x){echo $e, $f, ltrim($r);} - else{echo $f, $e, $r;} - }elseif(isset($b[$y])){echo $f, $e, $r; - }elseif(isset($a[$y])){echo $e, $f, ltrim($r); - }elseif(!$y){echo $f, $e, $f, ltrim($r); - }else{echo $e, $r;} -} -$t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()); + $X = 0; +} +$t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents())); ob_end_clean(); if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){ $t = str_replace("\n", $l, $t); @@ -686,7 +698,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array( function hl_version(){ // rel -return '1.1.10'; +return '1.1.16'; // eof } @@ -708,4 +720,4 @@ function kses_hook($t, &$C, &$S){ // kses compat return $t; // eof -} \ No newline at end of file +}