Newer
Older
<?php
/*********************************************************************
class.format.php
Collection of helper function used for formatting
http://www.osticket.com
Released under the GNU General Public License WITHOUT ANY WARRANTY.
See LICENSE.TXT for details.
vim: expandtab sw=4 ts=4 sts=4:
**********************************************************************/
class Format {
function file_size($bytes) {
if(!is_numeric($bytes))
return $bytes;
if($bytes<1024)
return $bytes.' bytes';
return round(($bytes/1048576),1).' mb';
function filesize2bytes($size) {
switch (substr($size, -1)) {
case 'M': case 'm': return (int)$size <<= 20;
case 'K': case 'k': return (int)$size <<= 10;
case 'G': case 'g': return (int)$size <<= 30;
}
return $size;
}
/* encode text into desired encoding - taking into accout charset when available. */
function encode($text, $charset=null, $encoding='utf-8') {
//Try auto-detecting charset/encoding
if (!$charset && function_exists('mb_detect_encoding'))
$charset = mb_detect_encoding($text);
// Cleanup - incorrect, bogus, or ambiguous charsets
// ISO-8859-1 is assumed for empty charset.
if (!$charset || in_array(strtolower(trim($charset)),
array('default','x-user-defined','iso','us-ascii')))
$charset = 'ISO-8859-1';
if (function_exists('iconv'))
$text = iconv($charset, $encoding.'//IGNORE', $text);
elseif (function_exists('mb_convert_encoding'))
$text = mb_convert_encoding($text, $encoding, $charset);
elseif (!strcasecmp($encoding, 'utf-8')
&& function_exists('utf8_encode')
&& !strcasecmp($charset, 'ISO-8859-1'))
$text = utf8_encode($text);
// If $text is false, then we have a (likely) invalid charset, use
// the original text and assume 8-bit (latin-1 / iso-8859-1)
// encoding
return (!$text && $original) ? $original : $text;
}
//Wrapper for utf-8 encoding.
function utf8encode($text, $charset=null) {
Peter Rotich
committed
return Format::encode($text, $charset, 'utf-8');
function mimedecode($text, $encoding='UTF-8') {
if(function_exists('imap_mime_header_decode')
&& ($parts = imap_mime_header_decode($text))) {
$str ='';
foreach ($parts as $part)
$str.= Format::encode($part->text, $part->charset, $encoding);
$text = $str;
} elseif($text[0] == '=' && function_exists('iconv_mime_decode')) {
$text = iconv_mime_decode($text, 0, $encoding);
} elseif(!strcasecmp($encoding, 'utf-8')
&& function_exists('imap_utf8')) {
$text = imap_utf8($text);
}
return $text;
}
/**
* Decodes filenames given in the content-disposition header according
* to RFC5987, such as filename*=utf-8''filename.png. Note that the
* language sub-component is defined in RFC5646, and that the filename
* is URL encoded (in the charset specified)
*/
function decodeRfc5987($filename) {
$match = array();
if (preg_match("/([\w!#$%&+^_`{}~-]+)'([\w-]*)'(.*)$/",
$filename, $match))
// XXX: Currently we don't care about the language component.
// The encoding hint is sufficient.
return self::utf8encode(urldecode($match[3]), $match[1]);
else
return $filename;
}
/**
* Json Encoder
*
*/
function json_encode($what) {
require_once (INCLUDE_DIR.'class.json.php');
return JsonDataEncoder::encode($what);
}
function phone($phone) {
$stripped= preg_replace("/[^0-9]/", "", $phone);
if(strlen($stripped) == 7)
return preg_replace("/([0-9]{3})([0-9]{4})/", "$1-$2",$stripped);
elseif(strlen($stripped) == 10)
return preg_replace("/([0-9]{3})([0-9]{3})([0-9]{4})/", "($1) $2-$3",$stripped);
else
return $phone;
}
function truncate($string,$len,$hard=false) {
if(!$len || $len>strlen($string))
return $string;
$string = substr($string,0,$len);
return $hard?$string:(substr($string,0,strrpos($string,' ')).' ...');
}
function strip_slashes($var) {
return is_array($var)?array_map(array('Format','strip_slashes'),$var):stripslashes($var);
}
function wrap($text, $len=75) {
return $len ? wordwrap($text, $len, "\n", true) : $text;
}
function html($html, $config=array('balance'=>1)) {
require_once(INCLUDE_DIR.'htmLawed.php');
$spec = false;
if (isset($config['spec']))
$spec = $config['spec'];
return htmLawed($html, $config, $spec);
function html2text($html, $width=74, $tidy=true) {
# Tidy html: decode, balance, sanitize tags
if($tidy)
$html = Format::html(Format::htmldecode($html), array('balance' => 1));
# See if advanced html2text is available (requires xml extension)
if (function_exists('convert_html_to_text')
&& extension_loaded('dom'))
return convert_html_to_text($html, $width);
# Try simple html2text - insert line breaks after new line tags.
$html = preg_replace(
array(':<br ?/?\>:i', ':(</div>)\s*:i', ':(</p>)\s*:i'),
array("\n", "$1\n", "$1\n\n"),
$html);
# Strip tags, decode html chars and wrap resulting text.
return Format::wrap(
Format::htmldecode( Format::striptags($html, false)),
$width);
}
static function __html_cleanup($el, $attributes=0) {
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
// Clean unexpected class values
if (isset($attributes['class'])) {
$classes = explode(' ', $attributes['class']);
foreach ($classes as $i=>$a)
// Unset all unsupported style classes -- anything but M$
if (strpos($a, 'Mso') !== 0)
unset($classes[$i]);
if ($classes)
$attributes['class'] = implode(' ', $classes);
else
unset($attributes['class']);
}
// Clean browser-specific style attributes
if (isset($attributes['style'])) {
$styles = preg_split('/;\s*/S', html_entity_decode($attributes['style']));
foreach ($styles as $i=>&$s) {
@list($prop, $val) = explode(':', $s);
if (isset($props[$prop])) {
unset($styles[$i]);
continue;
}
$props[$prop] = true;
// Remove unset or browser-specific style rules
if (!$val || !$prop || $prop[0] == '-' || substr($prop, 0, 4) == 'mso-')
// Remove quotes of properties without enclosed space
$val = str_replace('"','', $val);
else
$val = str_replace('"',"'", $val);
$s = "$prop:".trim($val);
$attributes['style'] = Format::htmlchars(implode(';', $styles));
else
unset($attributes['style']);
}
$at = '';
if (is_array($attributes)) {
foreach ($attributes as $k=>$v)
$at .= " $k=\"$v\"";
return "<{$el}{$at}".(isset($eE[$el])?" /":"").">";
}
else {
return "</{$el}>";
}
}
array(':<(head|style|script).+?</\1>:is', # <head> and <style> sections
':<!\[[^]<]+\]>:', # <![if !mso]> and friends
':<!DOCTYPE[^>]+>:', # <!DOCTYPE ... >
':<\?[^>]+>:', # <?xml version="1.0" ... >
array('', '', '', ''),
$config = array(
'safe' => 1, //Exclude applet, embed, iframe, object and script tags.
'balance' => 1, //balance and close unclosed tags.
'comment' => 1, //Remove html comments (OUTLOOK LOVE THEM)
'deny_attribute' => 'id',
'schemes' => 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https; src: cid, http, https, data',
'hook_tag' => function($e, $a=0) { return Format::__html_cleanup($e, $a); },
'spec' => 'iframe=-*,height,width,type,src(match="`^(https?:)?//(www\.)?(youtube|dailymotion|vimeo)\.com/`i"),frameborder; div=data-mid',
return Format::html($html, $config);
function localizeInlineImages($text) {
// Change file.php urls back to content-id's
return preg_replace(
'/src="(?:\/[^"]+?)?\/file\\.php\\?(?:\w+=[^&]+&(?:amp;)?)*?key=([^&]+)[^"]*/',
'src="cid:$1', $text);
}
function sanitize($text, $striptags=false) {
Peter Rotich
committed
//balance and neutralize unsafe tags.
$text = Format::safe_html($text);
$text = self::localizeInlineImages($text);
Peter Rotich
committed
//If requested - strip tags with decoding disabled.
return $striptags?Format::striptags($text, false):$text;
}
function htmlchars($var, $sanitize = false) {
return array_map(array('Format', 'htmlchars'), $var);
if ($sanitize)
$var = Format::sanitize($var);
if (!isset($phpversion))
$phpversion = phpversion();
Peter Rotich
committed
return htmlspecialchars( (string) $var, $flags, 'UTF-8', false);
} catch(Exception $e) {
return $var;
}
Peter Rotich
committed
function htmldecode($var) {
if(is_array($var))
return array_map(array('Format','htmldecode'), $var);
$flags = ENT_COMPAT;
if (phpversion() >= '5.4.0')
$flags |= ENT_HTML401;
return htmlspecialchars_decode($var, $flags);
Peter Rotich
committed
}
function display($text, $inline_images=true) {
// Make showing offsite images optional
$text = preg_replace_callback('/<img ([^>]*)(src="http[^"]+")([^>]*)\/>/',
function($match) {
// Drop embedded classes -- they don't refer to ours
$match = preg_replace('/class="[^"]*"/', '', $match);
return sprintf('<span %s class="non-local-image" data-%s %s></span>',
$text = Format::clickableurls($text);
if ($inline_images)
return self::viewableImages($text);
return $text;
Peter Rotich
committed
function striptags($var, $decode=true) {
if(is_array($var))
return array_map(array('Format','striptags'), $var, array_fill(0, count($var), $decode));
return strip_tags($decode?Format::htmldecode($var):$var);
//make urls clickable. Mainly for display
global $ost;
// Find all text between tags
$text = preg_replace_callback(':^[^<]+|>[^<]+:',
// Scan for things that look like URLs
return preg_replace_callback(
'`(?<!>)(((f|ht)tp(s?)://|(?<!//)www\.)([-+~%/.\w]+)(?:[-?#+=&;%@.\w]*)?)'
.'|(\b[_\.0-9a-z-]+@([0-9a-z][0-9a-z-]+\.)+[a-z]{2,4})`',
while (in_array(substr($match[1], -1),
array('.','?','-',':',';'))) {
$match[9] = substr($match[1], -1) . $match[9];
$match[1] = substr($match[1], 0, strlen($match[1])-1);
}
if (strpos($match[2], '//') === false) {
$match[1] = 'http://' . $match[1];
}
return sprintf('<a href="%s">%s</a>%s',
$match[1], $match[1], $match[9]);
} elseif ($match[6]) {
return sprintf('<a href="mailto:%1$s" target="_blank">%1$s</a>',
$match[6]);
}
},
$match[0]);
},
$text);
// Now change @href and @src attributes to come back through our
// system as well
$config = array(
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1,
'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
if ($e == 'a' && $a) {
$at = '';
if (is_array($a)) {
foreach ($a as $k=>$v)
$at .= " $k=\"$v\"";
return "<{$e}{$at}".(isset($eE[$e])?" /":"").">";
return "</{$e}>";
}
},
'schemes' => 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https; src: cid, http, https, data',
'elements' => '*+iframe',
'spec' => 'span=data-src,width,height',
);
return Format::html($text, $config);
return preg_replace("/\n{3,}/", "\n\n", trim($string));
function viewableImages($html, $script=false) {
return preg_replace_callback('/"cid:([\w._-]{32})"/',
function($match) use ($script) {
$hash = $match[1];
if (!($file = AttachmentFile::lookup($hash)))
return $match[0];
return sprintf('"%s" data-cid="%s"',
$file->getDownloadUrl(false, 'inline', $script), $match[1]);
}, $html);
}
/**
* Thanks, http://us2.php.net/manual/en/function.implode.php
* Implode an array with the key and value pair giving
* a glue, a separator between pairs and the array
* to implode.
* @param string $glue The glue between key and value
* @param string $separator Separator between pairs
* @param array $array The array to implode
* @return string The imploded array
*/
function array_implode( $glue, $separator, $array ) {
if ( !is_array( $array ) ) return $array;
$string = array();
foreach ( $array as $key => $val ) {
if ( is_array( $val ) )
$val = implode( ',', $val );
return implode( $separator, $string );
}
/* elapsed time */
if(!$sec || !is_numeric($sec)) return "";
$days = floor($sec / 86400);
$hrs = floor(bcmod($sec,86400)/3600);
$mins = round(bcmod(bcmod($sec,86400),3600)/60);
if($days > 0) $tstring = $days . 'd,';
if($hrs > 0) $tstring = $tstring . $hrs . 'h,';
$tstring =$tstring . $mins . 'm';
return $tstring;
}
function __formatDate($timestamp, $format, $fromDb, $dayType, $timeType,
$strftimeFallback, $timezone) {
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
if ($timestamp && $fromDb) {
$timestamp = Misc::db2gmtime($timestamp);
}
elseif (!$timestamp) {
$D = new DateTime();
$timestamp = $D->getTimestamp();
}
if (class_exists('IntlDateFormatter')) {
$formatter = new IntlDateFormatter(
Internationalization::getCurrentLocale(),
$dayType,
$timeType,
$timezone,
IntlDateFormatter::GREGORIAN,
$format ?: null
);
if ($cfg->isForce24HourTime()) {
$format = str_replace(array('a', 'h'), array('', 'H'),
$formatter->getPattern());
$formatter->setPattern($format);
}
return $formatter->format($timestamp);
}
// Fallback using strftime
$format = self::getStrftimeFormat($format);
// TODO: Properly convert to local time
$time = DateTime::createFromFormat('U', $timestamp, new DateTimeZone('UTC'));
$time->setTimeZone(new DateTimeZone($cfg->getTimezone() ?: date_default_timezone_get()));
$timestamp = $time->getTimestamp();
return strftime($format ?: $strftimeFallback, $timestamp);
function parseDate($date, $format=false) {
if (class_exists('IntlDateFormatter')) {
$formatter = new IntlDateFormatter(
Internationalization::getCurrentLocale(),
null,
null,
null,
IntlDateFormatter::GREGORIAN,
$format ?: null
);
if ($cfg->isForce24HourTime()) {
$format = str_replace(array('a', 'h'), array('', 'H'),
$formatter->getPattern());
$formatter->setPattern($format);
}
return $formatter->parse($date);
}
// Fallback using strtotime
return strtotime($date);
function time($timestamp, $fromDb=true, $format=false, $timezone=false) {
return self::__formatDate($timestamp,
$format ?: $cfg->getTimeFormat(), $fromDb,
function date($timestamp, $fromDb=true, $format=false, $timezone=false) {
global $cfg;
return self::__formatDate($timestamp,
$format ?: $cfg->getDateFormat(), $fromDb,
function datetime($timestamp, $fromDb=true, $timezone=false) {
global $cfg;
return self::__formatDate($timestamp,
function daydatetime($timestamp, $fromDb=true, $timezone=false) {
global $cfg;
return self::__formatDate($timestamp,
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
}
function getStrftimeFormat($format) {
static $dateToStrftime = array(
'%d' => 'dd',
'%a' => 'EEE',
'%e' => 'd',
'%A' => 'EEEE',
'%w' => 'e',
'%w' => 'c',
'%z' => 'D',
'%V' => 'w',
'%B' => 'MMMM',
'%m' => 'MM',
'%b' => 'MMM',
'%g' => 'Y',
'%G' => 'Y',
'%Y' => 'y',
'%y' => 'yy',
'%P' => 'a',
'%l' => 'h',
'%k' => 'H',
'%I' => 'hh',
'%H' => 'HH',
'%M' => 'mm',
'%S' => 'ss',
'%z' => 'ZZZ',
'%Z' => 'z',
);
$flipped = array_flip($dateToStrftime);
krsort($flipped);
// Also establish a list of ids, so we can do a creative replacement
// without clobbering the common letters in the formats
$ids = array_keys($flipped);
$ids = array_flip($ids);
foreach ($flipped as $icu=>$date) {
$format = str_replace($date, chr($ids[$icu]), $format);
}
return preg_replace_callback('`[\x00-\x1f]`',
function($m) use ($ids) {
return $ids[ord($m[0])];
},
$format
);
// Thanks, http://stackoverflow.com/a/2955878/1025836
/* static */
function slugify($text) {
// replace non letter or digits by -
$text = preg_replace('~[^\p{L}\p{N}]+~u', '-', $text);
// trim
$text = trim($text, '-');
// lowercase
$text = strtolower($text);
return (empty($text)) ? 'n-a' : $text;
}
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
/**
* Parse RFC 2397 formatted data strings. Format according to the RFC
* should look something like:
*
* data:[type/subtype][;charset=utf-8][;base64],data
*
* Parameters:
* $data - (string) RFC2397 formatted data string
* $output_encoding - (string:optional) Character set the input data
* should be encoded to.
* $always_convert - (bool|default:true) If the input data string does
* not specify an input encding, assume iso-8859-1. If this flag is
* set, the output will always be transcoded to the declared
* output_encoding, if set.
*
* Returs:
* array (data=>parsed and transcoded data string, type=>MIME type
* declared in the data string or text/plain otherwise)
*
* References:
* http://www.ietf.org/rfc/rfc2397.txt
*/
function parseRfc2397($data, $output_encoding=false, $always_convert=true) {
if (substr($data, 0, 5) != "data:")
return array('data'=>$data, 'type'=>'text/plain');
$data = substr($data, 5);
list($meta, $contents) = explode(",", $data, 2);
if ($meta)
list($type, $extra) = explode(";", $meta, 2);
else
$extra = '';
if (!isset($type) || !$type)
$type = 'text/plain';
$parameters = explode(";", $extra);
# Handle 'charset' hint in $extra, such as
# data:text/plain;charset=iso-8859-1,Blah
# Convert to utf-8 since it's the encoding scheme for the database.
$charset = ($always_convert) ? 'iso-8859-1' : false;
foreach ($parameters as $p) {
list($param, $value) = explode('=', $extra);
if ($param == 'charset')
$charset = $value;
elseif ($param == 'base64')
$contents = base64_decode($contents);
}
if ($output_encoding && $charset)
$contents = Format::encode($contents, $charset, $output_encoding);
return array(
'data' => $contents,
'type' => $type
);
}
// Performs Unicode normalization (where possible) and splits words at
// difficult word boundaries (for far eastern languages)
function searchable($text, $lang=false) {
global $cfg;
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
if (function_exists('normalizer_normalize')) {
// Normalize text input :: remove diacritics and such
$text = normalizer_normalize($text, Normalizer::FORM_C);
}
else {
// As a lightweight compatiblity, use a lightweight C
// normalizer with diacritic removal, thanks
// http://ahinea.com/en/tech/accented-translate.html
$tr = array(
"ä" => "a", "ñ" => "n", "ö" => "o", "ü" => "u", "ÿ" => "y"
);
$text = strtr($text, $tr);
}
// Decompose compatible versions of characters (ä => ae)
$tr = array(
"ß" => "ss", "Æ" => "AE", "æ" => "ae", "IJ" => "IJ",
"ij" => "ij", "Œ" => "OE", "œ" => "oe", "Ð" => "D",
"Đ" => "D", "ð" => "d", "đ" => "d", "Ħ" => "H", "ħ" => "h",
"ı" => "i", "ĸ" => "k", "Ŀ" => "L", "Ł" => "L", "ŀ" => "l",
"ł" => "l", "Ŋ" => "N", "ʼn" => "n", "ŋ" => "n", "Ø" => "O",
"ø" => "o", "ſ" => "s", "Þ" => "T", "Ŧ" => "T", "þ" => "t",
"ŧ" => "t", "ä" => "ae", "ö" => "oe", "ü" => "ue",
"Ä" => "AE", "Ö" => "OE", "Ü" => "UE",
);
$text = strtr($text, $tr);
// Drop separated diacritics
$text = preg_replace('/\p{M}/u', '', $text);
// Drop extraneous whitespace
$text = preg_replace('/(\s)\s+/u', '$1', $text);
// Drop leading and trailing whitespace
$text = trim($text);
if (class_exists('IntlBreakIterator')) {
// Split by word boundaries
if ($tokenizer = IntlBreakIterator::createWordInstance(
$lang ?: ($cfg ? $cfg->getPrimaryLanguage() : 'en_US'))
$tokens = array();
foreach ($tokenizer as $token)
$tokens[] = $token;
$text = implode(' ', $tokens);
}
}
else {
// Approximate word boundaries from Unicode chart at
// http://www.unicode.org/reports/tr29/#Word_Boundaries
// Punt for now
}
return $text;
}
if (!class_exists('IntlDateFormatter')) {
define('IDF_NONE', 0);
define('IDF_SHORT', 1);
define('IDF_FULL', 2);
}
else {
define('IDF_NONE', IntlDateFormatter::NONE);
define('IDF_SHORT', IntlDateFormatter::SHORT);
define('IDF_FULL', IntlDateFormatter::FULL);
}