-
Peter Rotich authored
Lint test rulez!
Peter Rotich authoredLint test rulez!
class.format.php 26.62 KiB
<?php
/*********************************************************************
class.format.php
Collection of helper function used for formatting
Peter Rotich <peter@osticket.com>
Copyright (c) 2006-2013 osTicket
http://www.osticket.com
Released under the GNU General Public License WITHOUT ANY WARRANTY.
See LICENSE.TXT for details.
vim: expandtab sw=4 ts=4 sts=4:
**********************************************************************/
class Format {
function file_size($bytes) {
if(!is_numeric($bytes))
return $bytes;
if($bytes<1024)
return $bytes.' bytes';
if($bytes < (900<<10))
return round(($bytes/1024),1).' kb';
return round(($bytes/1048576),1).' mb';
}
function filesize2bytes($size) {
switch (substr($size, -1)) {
case 'M': case 'm': return (int)$size <<= 20;
case 'K': case 'k': return (int)$size <<= 10;
case 'G': case 'g': return (int)$size <<= 30;
}
return $size;
}
/* encode text into desired encoding - taking into accout charset when available. */
function encode($text, $charset=null, $encoding='utf-8') {
//Try auto-detecting charset/encoding
if (!$charset && function_exists('mb_detect_encoding'))
$charset = mb_detect_encoding($text);
// Cleanup - incorrect, bogus, or ambiguous charsets
// ISO-8859-1 is assumed for empty charset.
if (!$charset || in_array(strtolower(trim($charset)),
array('default','x-user-defined','iso','us-ascii')))
$charset = 'ISO-8859-1';
$original = $text;
if (function_exists('iconv'))
$text = iconv($charset, $encoding.'//IGNORE', $text);
elseif (function_exists('mb_convert_encoding'))
$text = mb_convert_encoding($text, $encoding, $charset);
elseif (!strcasecmp($encoding, 'utf-8')
&& function_exists('utf8_encode')
&& !strcasecmp($charset, 'ISO-8859-1'))
$text = utf8_encode($text);
// If $text is false, then we have a (likely) invalid charset, use
// the original text and assume 8-bit (latin-1 / iso-8859-1)
// encoding
return (!$text && $original) ? $original : $text;
}
//Wrapper for utf-8 encoding.
function utf8encode($text, $charset=null) {
return Format::encode($text, $charset, 'utf-8');
}
function mimedecode($text, $encoding='UTF-8') {
if(function_exists('imap_mime_header_decode')
&& ($parts = imap_mime_header_decode($text))) {
$str ='';
foreach ($parts as $part)
$str.= Format::encode($part->text, $part->charset, $encoding);
$text = $str;
} elseif($text[0] == '=' && function_exists('iconv_mime_decode')) {
$text = iconv_mime_decode($text, 0, $encoding);
} elseif(!strcasecmp($encoding, 'utf-8')
&& function_exists('imap_utf8')) {
$text = imap_utf8($text);
}
return $text;
}
/**
* Decodes filenames given in the content-disposition header according
* to RFC5987, such as filename*=utf-8''filename.png. Note that the
* language sub-component is defined in RFC5646, and that the filename
* is URL encoded (in the charset specified)
*/
function decodeRfc5987($filename) {
$match = array();
if (preg_match("/([\w!#$%&+^_`{}~-]+)'([\w-]*)'(.*)$/",
$filename, $match))
// XXX: Currently we don't care about the language component.
// The encoding hint is sufficient.
return self::utf8encode(urldecode($match[3]), $match[1]);
else
return $filename;
}
/**
* Json Encoder
*
*/
function json_encode($what) {
require_once (INCLUDE_DIR.'class.json.php');
return JsonDataEncoder::encode($what);
}
function phone($phone) {
$stripped= preg_replace("/[^0-9]/", "", $phone);
if(strlen($stripped) == 7)
return preg_replace("/([0-9]{3})([0-9]{4})/", "$1-$2",$stripped);
elseif(strlen($stripped) == 10)
return preg_replace("/([0-9]{3})([0-9]{3})([0-9]{4})/", "($1) $2-$3",$stripped);
else
return $phone;
}
function truncate($string,$len,$hard=false) {
if(!$len || $len>strlen($string))
return $string;
$string = substr($string,0,$len);
return $hard?$string:(substr($string,0,strrpos($string,' ')).' ...');
}
function strip_slashes($var) {
return is_array($var)?array_map(array('Format','strip_slashes'),$var):stripslashes($var);
}
function wrap($text, $len=75) {
return $len ? wordwrap($text, $len, "\n", true) : $text;
}
function html($html, $config=array('balance'=>1)) {
require_once(INCLUDE_DIR.'htmLawed.php');
$spec = false;
if (isset($config['spec']))
$spec = $config['spec'];
return htmLawed($html, $config, $spec);
}
function html2text($html, $width=74, $tidy=true) {
# Tidy html: decode, balance, sanitize tags
if($tidy)
$html = Format::html(Format::htmldecode($html), array('balance' => 1));
# See if advanced html2text is available (requires xml extension)
if (function_exists('convert_html_to_text')
&& extension_loaded('dom'))
return convert_html_to_text($html, $width);
# Try simple html2text - insert line breaks after new line tags.
$html = preg_replace(
array(':<br ?/?\>:i', ':(</div>)\s*:i', ':(</p>)\s*:i'),
array("\n", "$1\n", "$1\n\n"),
$html);
# Strip tags, decode html chars and wrap resulting text.
return Format::wrap(
Format::htmldecode( Format::striptags($html, false)),
$width);
}
static function __html_cleanup($el, $attributes=0) {
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
// Clean unexpected class values
if (isset($attributes['class'])) {
$classes = explode(' ', $attributes['class']);
foreach ($classes as $i=>$a)
// Unset all unsupported style classes -- anything but M$
if (strpos($a, 'Mso') !== 0)
unset($classes[$i]);
if ($classes)
$attributes['class'] = implode(' ', $classes);
else
unset($attributes['class']);
}
// Clean browser-specific style attributes
if (isset($attributes['style'])) {
$styles = preg_split('/;\s*/S', html_entity_decode($attributes['style']));
$props = array();
foreach ($styles as $i=>&$s) {
@list($prop, $val) = explode(':', $s);
if (isset($props[$prop])) {
unset($styles[$i]);
continue;
}
$props[$prop] = true;
// Remove unset or browser-specific style rules
if (!$val || !$prop || $prop[0] == '-' || substr($prop, 0, 4) == 'mso-')
unset($styles[$i]);
// Remove quotes of properties without enclosed space
if (!strpos($val, ' '))
$val = str_replace('"','', $val);
else
$val = str_replace('"',"'", $val);
$s = "$prop:".trim($val);
}
unset($s);
if ($styles)
$attributes['style'] = Format::htmlencode(implode(';', $styles));
else
unset($attributes['style']);
}
$at = '';
if (is_array($attributes)) {
foreach ($attributes as $k=>$v)
$at .= " $k=\"$v\"";
return "<{$el}{$at}".(isset($eE[$el])?" /":"").">";
}
else {
return "</{$el}>";
}
}
function safe_html($html) {
// Remove HEAD and STYLE sections
$html = preg_replace(
array(':<(head|style|script).+?</\1>:is', # <head> and <style> sections
':<!\[[^]<]+\]>:', # <![if !mso]> and friends
':<!DOCTYPE[^>]+>:', # <!DOCTYPE ... >
':<\?[^>]+>:', # <?xml version="1.0" ... >
),
array('', '', '', ''),
$html);
$config = array(
'safe' => 1, //Exclude applet, embed, iframe, object and script tags.
'balance' => 1, //balance and close unclosed tags.
'comment' => 1, //Remove html comments (OUTLOOK LOVE THEM)
'tidy' => -1,
'deny_attribute' => 'id',
'schemes' => 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https; src: cid, http, https, data',
'hook_tag' => function($e, $a=0) { return Format::__html_cleanup($e, $a); },
'elements' => '*+iframe',
'spec' => 'iframe=-*,height,width,type,src(match="`^(https?:)?//(www\.)?(youtube|dailymotion|vimeo)\.com/`i"),frameborder; div=data-mid',
);
return Format::html($html, $config);
}
function localizeInlineImages($text) {
// Change image.php urls back to content-id's
return preg_replace('/image\\.php\\?h=([\\w.-]{32})\\w{32}/',
'cid:$1', $text);
}
function sanitize($text, $striptags=false) {
//balance and neutralize unsafe tags.
$text = Format::safe_html($text);
$text = self::localizeInlineImages($text);
//If requested - strip tags with decoding disabled.
return $striptags?Format::striptags($text, false):$text;
}
function htmlchars($var) {
return Format::htmlencode($var);
}
function htmlencode($var) {
static $phpversion = null;
if (is_array($var))
return array_map(array('Format', 'htmlencode'), $var);
if (!isset($phpversion))
$phpversion = phpversion();
$flags = ENT_COMPAT;
if ($phpversion >= '5.4.0')
$flags |= ENT_HTML401;
try {
return htmlentities( (string) $var, $flags, 'UTF-8', false);
} catch(Exception $e) {
return $var;
}
}
function htmldecode($var) {
if(is_array($var))
return array_map(array('Format','htmldecode'), $var);
$flags = ENT_COMPAT;
if (phpversion() >= '5.4.0')
$flags |= ENT_HTML401;
return html_entity_decode($var, $flags, 'UTF-8');
}
function input($var) {
return Format::htmlencode($var);
}
//Format text for display..
function display($text, $inline_images=true) {
// Make showing offsite images optional
$text = preg_replace_callback('/<img ([^>]*)(src="http[^"]+")([^>]*)\/>/',
function($match) {
// Drop embedded classes -- they don't refer to ours
$match = preg_replace('/class="[^"]*"/', '', $match);
return sprintf('<span %s class="non-local-image" data-%s %s></span>',
$match[1], $match[2], $match[3]);
},
$text);
//make urls clickable.
$text = Format::clickableurls($text);
if ($inline_images)
return self::viewableImages($text);
return $text;
}
function striptags($var, $decode=true) {
if(is_array($var))
return array_map(array('Format','striptags'), $var, array_fill(0, count($var), $decode));
return strip_tags($decode?Format::htmldecode($var):$var);
}
//make urls clickable. Mainly for display
function clickableurls($text, $trampoline=true) {
global $ost;
$token = $ost->getLinkToken();
// Find all text between tags
$text = preg_replace_callback(':^[^<]+|>[^<]+:',
function($match) use ($token, $trampoline) {
// Scan for things that look like URLs
return preg_replace_callback(
'`(?<!>)(((f|ht)tp(s?)://|(?<!//)www\.)([-+~%/.\w]+)(?:[-?#+=&;%@.\w]*)?)'
.'|(\b[_\.0-9a-z-]+@([0-9a-z][0-9a-z-]+\.)+[a-z]{2,4})`',
function ($match) use ($token, $trampoline) {
if ($match[1]) {
while (in_array(substr($match[1], -1),
array('.','?','-',':',';'))) {
$match[9] = substr($match[1], -1) . $match[9];
$match[1] = substr($match[1], 0, strlen($match[1])-1);
}
if (strpos($match[2], '//') === false) {
$match[1] = 'http://' . $match[1];
}
if ($trampoline)
return '<a href="l.php?url='.urlencode($match[1])
.sprintf('&auth=%s" target="_blank">', $token)
.$match[1].'</a>'.$match[9];
else
return sprintf('<a href="%s">%s</a>%s',
$match[1], $match[1], $match[9]);
} elseif ($match[6]) {
return sprintf('<a href="mailto:%1$s" target="_blank">%1$s</a>',
$match[6]);
}
},
$match[0]);
},
$text);
// Now change @href and @src attributes to come back through our
// system as well
$config = array(
'hook_tag' => function($e, $a=0) use ($token) {
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
if ($e == 'a' && $a) {
if (isset($a['href'])
&& strpos($a['href'], 'mailto:') !== 0
&& strpos($a['href'], 'l.php?') === false)
$a['href'] = 'l.php?url='.urlencode($a['href'])
.'&auth='.$token;
// ALL link targets open in a new tab
$a['target'] = '_blank';
$a['class'] = 'no-pjax';
}
// Images which are external are rewritten to <div
// data-src='url...'/>
elseif ($e == 'span' && $a && isset($a['data-src']))
$a['data-src'] = 'l.php?url='.urlencode($a['data-src'])
.'&auth='.$token;
// URLs for videos need to route too
elseif ($e == 'iframe' && $a && isset($a['src']))
$a['src'] = 'l.php?url='.urlencode($a['src'])
.'&auth='.$token;
$at = '';
if (is_array($a)) {
foreach ($a as $k=>$v)
$at .= " $k=\"$v\"";
return "<{$e}{$at}".(isset($eE[$e])?" /":"").">";
}
else {
return "</{$e}>";
}
},
'schemes' => 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https; src: cid, http, https, data',
'elements' => '*+iframe',
'spec' => 'span=data-src,width,height',
);
return Format::html($text, $config);
}
function stripEmptyLines($string) {
return preg_replace("/\n{3,}/", "\n\n", trim($string));
}
function viewableImages($html, $script='image.php') {
return preg_replace_callback('/"cid:([\w._-]{32})"/',
function($match) use ($script) {
$hash = $match[1];
if (!($file = AttachmentFile::lookup($hash)))
return $match[0];
return sprintf('"%s?h=%s" data-cid="%s"',
$script, $file->getDownloadHash(), $match[1]);
}, $html);
}
/**
* Thanks, http://us2.php.net/manual/en/function.implode.php
* Implode an array with the key and value pair giving
* a glue, a separator between pairs and the array
* to implode.
* @param string $glue The glue between key and value
* @param string $separator Separator between pairs
* @param array $array The array to implode
* @return string The imploded array
*/
function array_implode( $glue, $separator, $array ) {
if ( !is_array( $array ) ) return $array;
$string = array();
foreach ( $array as $key => $val ) {
if ( is_array( $val ) )
$val = implode( ',', $val );
$string[] = "{$key}{$glue}{$val}";
}
return implode( $separator, $string );
}
/* elapsed time */
function elapsedTime($sec) {
if(!$sec || !is_numeric($sec)) return "";
$days = floor($sec / 86400);
$hrs = floor(bcmod($sec,86400)/3600);
$mins = round(bcmod(bcmod($sec,86400),3600)/60);
if($days > 0) $tstring = $days . 'd,';
if($hrs > 0) $tstring = $tstring . $hrs . 'h,';
$tstring =$tstring . $mins . 'm';
return $tstring;
}
function __formatDate($timestamp, $format, $fromDb, $dayType, $timeType,
$strftimeFallback, $timezone) {
global $cfg;
if ($timestamp && $fromDb) {
$timestamp = Misc::db2gmtime($timestamp);
}
elseif (!$timestamp) {
$D = new DateTime();
$timestamp = $D->getTimestamp();
}
if (class_exists('IntlDateFormatter')) {
$formatter = new IntlDateFormatter(
Internationalization::getCurrentLocale(),
$dayType,
$timeType,
$timezone,
IntlDateFormatter::GREGORIAN,
$format ?: null
);
if ($cfg->isForce24HourTime()) {
$format = str_replace(array('a', 'h'), array('', 'H'),
$formatter->getPattern());
$formatter->setPattern($format);
}
return $formatter->format($timestamp);
}
// Fallback using strftime
$format = self::getStrftimeFormat($format);
// TODO: Properly convert to local time
$time = DateTime::createFromFormat('U', $timestamp, new DateTimeZone('UTC'));
$time->setTimeZone(new DateTimeZone($cfg->getTimezone()));
$timestamp = $time->getTimestamp();
return strftime($format ?: $strftimeFallback, $timestamp);
}
function parseDate($date, $format=false) {
global $cfg;
if (class_exists('IntlDateFormatter')) {
$formatter = new IntlDateFormatter(
Internationalization::getCurrentLocale(),
null,
null,
null,
IntlDateFormatter::GREGORIAN,
$format ?: null
);
if ($cfg->isForce24HourTime()) {
$format = str_replace(array('a', 'h'), array('', 'H'),
$formatter->getPattern());
$formatter->setPattern($format);
}
return $formatter->parse($date);
}
// Fallback using strtotime
return strtotime($date);
}
function time($timestamp, $fromDb=true, $format=false, $timezone=false) {
global $cfg;
return self::__formatDate($timestamp,
$format ?: $cfg->getTimeFormat(), $fromDb,
IntlDateFormatter::NONE, IntlDateFormatter::SHORT,
'%x', $timezone ?: $cfg->getTimezone());
}
function date($timestamp, $fromDb=true, $format=false, $timezone=false) {
global $cfg;
return self::__formatDate($timestamp,
$format ?: $cfg->getDateFormat(), $fromDb,
IntlDateFormatter::SHORT, IntlDateFormatter::NONE,
'%X', $timezone ?: $cfg->getTimezone());
}
function datetime($timestamp, $fromDb=true, $timezone=false) {
global $cfg;
return self::__formatDate($timestamp,
$cfg->getDateTimeFormat(), $fromDb,
IntlDateFormatter::SHORT, IntlDateFormatter::SHORT,
'%X %x', $timezone ?: $cfg->getTimezone());
}
function daydatetime($timestamp, $fromDb=true, $timezone=false) {
global $cfg;
return self::__formatDate($timestamp,
$cfg->getDayDateTimeFormat(), $fromDb,
IntlDateFormatter::FULL, IntlDateFormatter::SHORT,
'%X %x', $timezone ?: $cfg->getTimezone());
}
function getStrftimeFormat($format) {
static $dateToStrftime = array(
'%d' => 'dd',
'%a' => 'EEE',
'%e' => 'd',
'%A' => 'EEEE',
'%w' => 'e',
'%w' => 'c',
'%z' => 'D',
'%V' => 'w',
'%B' => 'MMMM',
'%m' => 'MM',
'%b' => 'MMM',
'%g' => 'Y',
'%G' => 'Y',
'%Y' => 'y',
'%y' => 'yy',
'%P' => 'a',
'%l' => 'h',
'%k' => 'H',
'%I' => 'hh',
'%H' => 'HH',
'%M' => 'mm',
'%S' => 'ss',
'%z' => 'ZZZ',
'%Z' => 'z',
);
$flipped = array_flip($dateToStrftime);
krsort($flipped);
// Also establish a list of ids, so we can do a creative replacement
// without clobbering the common letters in the formats
$ids = array_keys($flipped);
$ids = array_flip($ids);
foreach ($flipped as $icu=>$date) {
$format = str_replace($date, chr($ids[$icu]), $format);
}
return preg_replace_callback('`[\x00-\x1f]`',
function($m) use ($ids) {
return $ids[ord($m[0])];
},
$format
);
}
// Thanks, http://stackoverflow.com/a/2955878/1025836
/* static */
function slugify($text) {
// replace non letter or digits by -
$text = preg_replace('~[^\p{L}\p{N}]+~u', '-', $text);
// trim
$text = trim($text, '-');
// lowercase
$text = strtolower($text);
return (empty($text)) ? 'n-a' : $text;
}
/**
* Parse RFC 2397 formatted data strings. Format according to the RFC
* should look something like:
*
* data:[type/subtype][;charset=utf-8][;base64],data
*
* Parameters:
* $data - (string) RFC2397 formatted data string
* $output_encoding - (string:optional) Character set the input data
* should be encoded to.
* $always_convert - (bool|default:true) If the input data string does
* not specify an input encding, assume iso-8859-1. If this flag is
* set, the output will always be transcoded to the declared
* output_encoding, if set.
*
* Returs:
* array (data=>parsed and transcoded data string, type=>MIME type
* declared in the data string or text/plain otherwise)
*
* References:
* http://www.ietf.org/rfc/rfc2397.txt
*/
function parseRfc2397($data, $output_encoding=false, $always_convert=true) {
if (substr($data, 0, 5) != "data:")
return array('data'=>$data, 'type'=>'text/plain');
$data = substr($data, 5);
list($meta, $contents) = explode(",", $data, 2);
if ($meta)
list($type, $extra) = explode(";", $meta, 2);
else
$extra = '';
if (!isset($type) || !$type)
$type = 'text/plain';
$parameters = explode(";", $extra);
# Handle 'charset' hint in $extra, such as
# data:text/plain;charset=iso-8859-1,Blah
# Convert to utf-8 since it's the encoding scheme for the database.
$charset = ($always_convert) ? 'iso-8859-1' : false;
foreach ($parameters as $p) {
list($param, $value) = explode('=', $extra);
if ($param == 'charset')
$charset = $value;
elseif ($param == 'base64')
$contents = base64_decode($contents);
}
if ($output_encoding && $charset)
$contents = Format::encode($contents, $charset, $output_encoding);
return array(
'data' => $contents,
'type' => $type
);
}
// Performs Unicode normalization (where possible) and splits words at
// difficult word boundaries (for far eastern languages)
function searchable($text, $lang=false) {
global $cfg;
if (function_exists('normalizer_normalize')) {
// Normalize text input :: remove diacritics and such
$text = normalizer_normalize($text, Normalizer::FORM_C);
}
else {
// As a lightweight compatiblity, use a lightweight C
// normalizer with diacritic removal, thanks
// http://ahinea.com/en/tech/accented-translate.html
$tr = array(
"ä" => "a", "ñ" => "n", "ö" => "o", "ü" => "u", "ÿ" => "y"
);
$text = strtr($text, $tr);
}
// Decompose compatible versions of characters (ä => ae)
$tr = array(
"ß" => "ss", "Æ" => "AE", "æ" => "ae", "IJ" => "IJ",
"ij" => "ij", "Œ" => "OE", "œ" => "oe", "Ð" => "D",
"Đ" => "D", "ð" => "d", "đ" => "d", "Ħ" => "H", "ħ" => "h",
"ı" => "i", "ĸ" => "k", "Ŀ" => "L", "Ł" => "L", "ŀ" => "l",
"ł" => "l", "Ŋ" => "N", "ʼn" => "n", "ŋ" => "n", "Ø" => "O",
"ø" => "o", "ſ" => "s", "Þ" => "T", "Ŧ" => "T", "þ" => "t",
"ŧ" => "t", "ä" => "ae", "ö" => "oe", "ü" => "ue",
"Ä" => "AE", "Ö" => "OE", "Ü" => "UE",
);
$text = strtr($text, $tr);
// Drop separated diacritics
$text = preg_replace('/\p{M}/u', '', $text);
// Drop extraneous whitespace
$text = preg_replace('/(\s)\s+/u', '$1', $text);
// Drop leading and trailing whitespace
$text = trim($text);
if (class_exists('IntlBreakIterator')) {
// Split by word boundaries
if ($tokenizer = IntlBreakIterator::createWordInstance(
$lang ?: ($cfg ? $cfg->getPrimaryLanguage() : 'en_US'))
) {
$tokenizer->setText($text);
$tokens = array();
foreach ($tokenizer as $token)
$tokens[] = $token;
$text = implode(' ', $tokens);
}
}
else {
// Approximate word boundaries from Unicode chart at
// http://www.unicode.org/reports/tr29/#Word_Boundaries
// Punt for now
}
return $text;
}
}
?>