Newer
Older
// XXX: This does not wrap Chinese characters well
// @see http://xml.ascc.net/en/utf-8/faq/zhl10n-faq-xsl.html#qb1
// for some more rules concerning Chinese chars
function mb_wordwrap($string, $width=75, $break="\n", $cut=false) {
if ($cut) {
// Match anything 1 to $width chars long followed by whitespace or EOS,
// otherwise match anything $width chars long
$search = '/((?>[^\n\p{M}]\p{M}*){1,'.$width.'})(?:[ \n]|$|(\p{Ps}))|((?>[^\n\p{M}]\p{M}*){'
.$width.'})/uS'; # <?php
$replace = '$1$3'.$break.'$2';
} else {
// Anchor the beginning of the pattern with a lookahead
// to avoid crazy backtracking when words are longer than $width
$pattern = '/(?=[\s\p{Ps}])(.{1,'.$width.'})(?:\s|$|(\p{Ps}))/uS';
$replace = '$1'.$break.'$2';
}
return rtrim(preg_replace($search, $replace, $string), $break);
}
// Thanks http://www.php.net/manual/en/ref.mbstring.php#90611
function mb_str_pad($input, $pad_length, $pad_string=" ",
$pad_style=STR_PAD_RIGHT) {
$marks = preg_match_all('/\p{M}/u', $input, $match);
strlen($input)-mb_strwidth($input)+$marks+$pad_length, $pad_string,
$pad_style);
}
// Enable use of html2text from command line
// The syntax is the following: php html2text.php file.html
do {
if (PHP_SAPI != 'cli') break;
if (empty ($_SERVER['argc']) || $_SERVER['argc'] < 2) break;
if (empty ($_SERVER['PHP_SELF']) || FALSE === strpos ($_SERVER['PHP_SELF'], 'html2text.php') ) break;
$file = $argv[1];
$width = 74;
if (isset($argv[2]))
$width = (int) $argv[2];
elseif (isset($ENV['COLUMNS']))
$width = $ENV['COLUMNS'];
require_once(dirname(__file__).'/../bootstrap.php');
Bootstrap::i18n_prep();
echo convert_html_to_text (file_get_contents ($file), $width);
} while (0);