diff --git a/include/class.format.php b/include/class.format.php index f3e78fc8b08884cbf559a2ccdb4fbf11372506f5..d8463ec51bb622aca6c8784a326ab0998f174099 100644 --- a/include/class.format.php +++ b/include/class.format.php @@ -128,8 +128,8 @@ class Format { return is_array($var)?array_map(array('Format','strip_slashes'),$var):stripslashes($var); } - function wrap($text,$len=75) { - return wordwrap($text,$len,"\n",true); + function wrap($text, $len=75) { + return $len ? wordwrap($text, $len, "\n", true) : $text; } function html($html, $config=array('balance'=>1)) { @@ -137,6 +137,29 @@ class Format { return htmLawed($html, $config); } + function html2text($html, $width=74) { + + # Tidy html: decode, balance, sanitize tags + if($tidy) + $html = Format::html(Format::htmldecode($html), array('balance' => 1)); + + # See if advanced html2text is available (requires xml extension) + if (function_exists('convert_html_to_text') + && extension_loaded('xml')) + return convert_html_to_text($html, $width); + + # Try simple html2text - insert line breaks after new line tags. + $html = preg_replace( + array(':<br ?/?\>:i', ':(</div>)\s*:i', ':(</p>)\s*:i') + array("\n", "$1\n", "$1\n\n"), + $html); + + # Strip tags, decode html chars and wrap resulting text. + return Format::wrap( + Format::htmldecode( Format::striptags($html, false)), + $width); + } + function safe_html($html) { // Remove HEAD and STYLE sections $html = preg_replace(':<(head|style).+</\1>:is','', $html); diff --git a/include/html2text.php b/include/html2text.php index 72ca01f217471480e9c15358a893d9e33ba3e181..713b449274a062328f64d45f52da8b91412002c0 100644 --- a/include/html2text.php +++ b/include/html2text.php @@ -25,16 +25,8 @@ * @return the HTML converted, as best as possible, to text */ function convert_html_to_text($html, $width=74) { - $html = fix_newlines($html); - - if (!extension_loaded('xml')) { - $html = preg_replace( - array(':<br ?/?>|</div>:i', ':</p>:i'), - array("\n", "\n\n"), - $html); - return Format::striptags($html); - } + $html = fix_newlines($html); $doc = new DOMDocument('1.0', 'utf-8'); if (!@$doc->loadHTML($html)) return $html;