Skip to content
Snippets Groups Projects
Commit 6c359046 authored by Jared Hancock's avatar Jared Hancock
Browse files

html: Balancing is important

9ae01bf2 turned off HTML balancing, however,
quoted response removal will create unbalanced HTML. That HTML will be
stored unbalanced and will require extra processing when displayed on the
ticket thread.

This patch adds balancing support using libxml if enabled, which will
balance HTML without corrupting the HTML like htmLawed will with balancing
enabled.
parent 56cc7098
No related branches found
No related tags found
No related merge requests found
......@@ -115,11 +115,79 @@ class Format {
return $len ? wordwrap($text, $len, "\n", true) : $text;
}
function html($html, $config=array('balance'=>1)) {
function html_balance($html, $remove_empty=true) {
if (!extension_loaded('dom'))
return $html;
if (!trim($html))
return $html;
$doc = new DomDocument();
$xhtml = '<?xml encoding="utf-8"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
// Wrap the content in a <div> because libxml would use a <p>
. "<div>$html</div>";
$doc->encoding = 'utf-8';
$doc->preserveWhitespace = false;
$doc->recover = true;
if (false === @$doc->loadHTML($xhtml))
return $html;
if ($remove_empty) {
// Remove empty nodes
$xpath = new DOMXPath($doc);
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
do {
$done = true;
$nodes = $xpath->query('//*[not(text()) and not(node())]');
foreach ($nodes as $n) {
if (isset($eE[$n->nodeName]))
continue;
$n->parentNode->removeChild($n);
$done = false;
}
} while (!$done);
}
static $phpversion;
if (!isset($phpversion))
$phpversion = phpversion();
$body = $doc->getElementsByTagName('body');
if (!$body->length)
return $html;
if ($phpversion > '5.3.6') {
$html = $doc->saveHTML($doc->getElementsByTagName('body')->item(0)->firstChild);
}
else {
$html = $doc->saveHTML();
$html = preg_replace('`^<!DOCTYPE.+?>|<\?xml .+?>|</?html>|</?body>|</?head>|<meta .+?/?>`', '', $html); # <?php
}
return preg_replace('`^<div>|</div>$`', '', $html);
}
function html($html, $config=array()) {
require_once(INCLUDE_DIR.'htmLawed.php');
$spec = false;
if (isset($config['spec']))
$spec = $config['spec'];
// Add in htmLawed defaults
$config += array(
'balance' => 1,
);
// Attempt to balance using libxml. htmLawed will corrupt HTML with
// balancing to fix improper HTML at the same time. For instance,
// some email clients may wrap block elements inside inline
// elements. htmLawed will change such block elements to inlines to
// make the HTML correct.
if ($config['balance'] && extension_loaded('dom')) {
$html = self::html_balance($html);
$config['balance'] = 0;
}
return htmLawed($html, $config, $spec);
}
......@@ -200,7 +268,7 @@ class Format {
}
}
function safe_html($html) {
function safe_html($html, $balance=1) {
// Remove HEAD and STYLE sections
$html = preg_replace(
array(':<(head|style|script).+?</\1>:is', # <head> and <style> sections
......@@ -212,7 +280,7 @@ class Format {
$html);
$config = array(
'safe' => 1, //Exclude applet, embed, iframe, object and script tags.
'balance' => 0, // No balance — corrupts poorly formatted Outlook html
'balance' => $balance,
'comment' => 1, //Remove html comments (OUTLOOK LOVE THEM)
'tidy' => -1,
'deny_attribute' => 'id',
......@@ -295,6 +363,7 @@ class Format {
$text);
//make urls clickable.
$text = self::html_balance($text, false);
$text = Format::clickableurls($text);
if ($inline_images)
......@@ -366,6 +435,7 @@ class Format {
},
'schemes' => 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https; src: cid, http, https, data',
'elements' => '*+iframe',
'balance' => 0,
'spec' => 'span=data-src,width,height',
);
return Format::html($text, $config);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment