WSCOMUN  2.1.2
Web Services Comunes para PHP/GVHidra
WSCMime.php
1 <?php
2 namespace WSCMIME;
3 
11 require_once 'WSCMimeParser.php';
12 require_once 'WSCCharset.php';
13 require_once 'WSCMessagePart.php';
14 
15 class WSCMime
16 {
17  const WSC_CHARSET = 'UTF-8';
18  private static $default_charset;
19 
20 
25  function __construct($default_charset = null)
26  {
27  self::$default_charset = self::WSC_CHARSET;
28  if (!empty($default_charset))
29  {
30  self::$default_charset = $default_charset;
31  }
32  }
33 
34 
40  public static function get_charset()
41  {
42  if (self::$default_charset)
43  {
44  return self::$default_charset;
45  }
46  return self::WSC_CHARSET;
47  }
48 
49 
60  public static function parse_message($raw_body)
61  {
62  $conf = array(
63  'include_bodies' => true,
64  'decode_bodies' => true,
65  'decode_headers' => false,
66  'default_charset' => self::get_charset(),
67  );
68  $mime = new WSCMimeParser($conf);
69  return $mime->decode($raw_body);
70  }
71 
83  static function decode_address_list($input, $max = null, $decode = true, $fallback = null, $addronly = false)
84  {
85  $a = self::parse_address_list($input, $decode, $fallback);
86  $out = array();
87  $j = 0;
88  // Special chars as defined by RFC 822 need to in quoted string (or escaped).
89  $special_chars = '[\(\)<>\\\.\[\]@,;:"]';
90  if (!is_array($a)) {
91  return $out;
92  }
93  foreach ($a as $val) {
94  $j++;
95  $address = trim($val['address']);
96  if ($addronly) {
97  $out[$j] = $address;
98  }
99  else {
100  $name = trim($val['name']);
101  if ($name && $address && $name != $address)
102  $string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address);
103  else if ($address)
104  $string = $address;
105  else if ($name)
106  $string = $name;
107  $out[$j] = array('name' => $name, 'mailto' => $address, 'string' => $string);
108  }
109  if ($max && $j==$max)
110  break;
111  }
112  return $out;
113  }
114 
123  public static function decode_header($input, $fallback = null)
124  {
125  $str = self::decode_mime_string((string)$input, $fallback);
126  return $str;
127  }
137  public static function decode_mime_string($input, $fallback = null)
138  {
139  $matches = array();
140 
141  $default_charset = $fallback ?: self::get_charset();
142  // rfc: all line breaks or other characters not found
143  // in the Base64 Alphabet must be ignored by decoding software
144  // delete all blanks between MIME-lines, differently we can
145  // receive unnecessary blanks and broken utf-8 symbols
146  $input = preg_replace("/\?=\s+=\?/", '?==?', $input);
147  // encoded-word regexp
148  $re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/';
149  // Find all RFC2047's encoded words
150  if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER))
151  {
152  // Initialize variables
153  $tmp = array();
154  $out = '';
155  $start = 0;
156  foreach ($matches as $idx => $m) {
157  $pos = $m[0][1];
158  $charset = $m[1][0];
159  $encoding = $m[2][0];
160  $text = $m[3][0];
161  $length = strlen($m[0][0]);
162  // Append everything that is before the text to be decoded
163  if ($start != $pos) {
164  $substr = substr($input, $start, $pos-$start);
165  $out .= WSCCharset::convert($substr, $default_charset);
166  $start = $pos;
167  }
168  $start += $length;
169  // Per RFC2047, each string part "MUST represent an integral number
170  // of characters . A multi-octet character may not be split across
171  // adjacent encoded-words." However, some mailers break this, so we
172  // try to handle characters spanned across parts anyway by iterating
173  // through and aggregating sequential encoded parts with the same
174  // character set and encoding, then perform the decoding on the
175  // aggregation as a whole.
176  $tmp[] = $text;
177  if ($next_match = $matches[$idx+1]) {
178  if ($next_match[0][1] == $start
179  && $next_match[1][0] == $charset
180  && $next_match[2][0] == $encoding
181  ) {
182  continue;
183  }
184  }
185  $count = count($tmp);
186  $text = '';
187  // Decode and join encoded-word's chunks
188  if ($encoding == 'B' || $encoding == 'b') {
189  // base64 must be decoded a segment at a time
190  for ($i=0; $i<$count; $i++)
191  $text .= base64_decode($tmp[$i]);
192  }
193  else { //if ($encoding == 'Q' || $encoding == 'q') {
194  // quoted printable can be combined and processed at once
195  for ($i=0; $i<$count; $i++)
196  $text .= $tmp[$i];
197  $text = str_replace('_', ' ', $text);
198  $text = quoted_printable_decode($text);
199  }
200  $out .= WSCCharset::convert($text, $charset);
201  $tmp = array();
202  }
203  // add the last part of the input string
204  if ($start != strlen($input)) {
205  $out .= WSCCharset::convert(substr($input, $start), $default_charset);
206  }
207  // return the results
208  return $out;
209  }
210  // no encoding information, use fallback
211  return WSCCharset::convert($input, $default_charset);
212  }
213 
222  public static function decode($input, $encoding = '7bit')
223  {
224  switch (strtolower($encoding)) {
225  case 'quoted-printable':
226  return quoted_printable_decode($input);
227  case 'base64':
228  return base64_decode($input);
229  case 'x-uuencode':
230  case 'x-uue':
231  case 'uue':
232  case 'uuencode':
233  return convert_uudecode($input);
234  case '7bit':
235  default:
236  return $input;
237  }
238  }
239 
243  public static function parse_headers($headers)
244  {
245  $a_headers = array();
246  $headers = preg_replace('/\r?\n(\t| )+/', ' ', $headers);
247  $lines = explode("\n", $headers);
248  $count = count($lines);
249  for ($i=0; $i<$count; $i++) {
250  if ($p = strpos($lines[$i], ': ')) {
251  $field = strtolower(substr($lines[$i], 0, $p));
252  $value = trim(substr($lines[$i], $p+1));
253  if (!empty($value)) {
254  $a_headers[$field] = $value;
255  }
256  }
257  }
258  return $a_headers;
259  }
260 
264  private static function parse_address_list($str, $decode = true, $fallback = null)
265  {
266  $m = array();
267  // remove any newlines and carriage returns before
268  $str = preg_replace('/\r?\n(\s|\t)?/', ' ', $str);
269  // extract list items, remove comments
270  $str = self::explode_header_string(',;', $str, true);
271  $result = array();
272  // simplified regexp, supporting quoted local part
273  $email_rx = '(\S+|("\s*(?:[^"\f\n\r\t\v\b\s]+\s*)+"))@\S+';
274  foreach ($str as $key => $val) {
275  $name = '';
276  $address = '';
277  $val = trim($val);
278  if (preg_match('/(.*)<('.$email_rx.')>$/', $val, $m)) {
279  $address = $m[2];
280  $name = trim($m[1]);
281  }
282  else if (preg_match('/^('.$email_rx.')$/', $val, $m)) {
283  $address = $m[1];
284  $name = '';
285  }
286  // special case (#1489092)
287  else if (preg_match('/(\s*<MAILER-DAEMON>)$/', $val, $m)) {
288  $address = 'MAILER-DAEMON';
289  $name = substr($val, 0, -strlen($m[1]));
290  }
291  else if (preg_match('/('.$email_rx.')/', $val, $m)) {
292  $name = $m[1];
293  }
294  else {
295  $name = $val;
296  }
297  // dequote and/or decode name
298  if ($name) {
299  if ($name[0] == '"' && $name[strlen($name)-1] == '"') {
300  $name = substr($name, 1, -1);
301  $name = stripslashes($name);
302  }
303  if ($decode) {
304  $name = self::decode_header($name, $fallback);
305  // some clients encode addressee name with quotes around it
306  if ($name[0] == '"' && $name[strlen($name)-1] == '"') {
307  $name = substr($name, 1, -1);
308  }
309  }
310  }
311  if (!$address && $name) {
312  $address = $name;
313  $name = '';
314  }
315  if ($address) {
316  $address = self::fix_email($address);
317  $result[$key] = array('name' => $name, 'address' => $address);
318  }
319  }
320  return $result;
321  }
322 
334  public static function explode_header_string($separator, $str, $remove_comments = false)
335  {
336  $length = strlen($str);
337  $result = array();
338  $quoted = false;
339  $comment = 0;
340  $out = '';
341  for ($i=0; $i<$length; $i++) {
342  // we're inside a quoted string
343  if ($quoted) {
344  if ($str[$i] == '"') {
345  $quoted = false;
346  }
347  else if ($str[$i] == "\\") {
348  if ($comment <= 0) {
349  $out .= "\\";
350  }
351  $i++;
352  }
353  }
354  // we are inside a comment string
355  else if ($comment > 0) {
356  if ($str[$i] == ')') {
357  $comment--;
358  }
359  else if ($str[$i] == '(') {
360  $comment++;
361  }
362  else if ($str[$i] == "\\") {
363  $i++;
364  }
365  continue;
366  }
367  // separator, add to result array
368  else if (strpos($separator, $str[$i]) !== false) {
369  if ($out) {
370  $result[] = $out;
371  }
372  $out = '';
373  continue;
374  }
375  // start of quoted string
376  else if ($str[$i] == '"') {
377  $quoted = true;
378  }
379  // start of comment
380  else if ($remove_comments && $str[$i] == '(') {
381  $comment++;
382  }
383  if ($comment <= 0) {
384  $out .= $str[$i];
385  }
386  }
387  if ($out && $comment <= 0) {
388  $result[] = $out;
389  }
390  return $result;
391  }
392 
401  public static function unfold_flowed($text, $mark = null)
402  {
403  $text = preg_split('/\r?\n/', $text);
404  $last = -1;
405  $q_level = 0;
406  $marks = array();
407  foreach ($text as $idx => $line) {
408  if ($q = strspn($line, '>')) {
409  // remove quote chars
410  $line = substr($line, $q);
411  // remove (optional) space-staffing
412  if ($line[0] === ' ') $line = substr($line, 1);
413  // The same paragraph (We join current line with the previous one) when:
414  // - the same level of quoting
415  // - previous line was flowed
416  // - previous line contains more than only one single space (and quote char(s))
417  if ($q == $q_level
418  && isset($text[$last]) && $text[$last][strlen($text[$last])-1] == ' '
419  && !preg_match('/^>+ {0,1}$/', $text[$last])
420  ) {
421  $text[$last] .= $line;
422  unset($text[$idx]);
423  if ($mark) {
424  $marks[$last] = true;
425  }
426  }
427  else {
428  $last = $idx;
429  }
430  }
431  else {
432  if ($line == '-- ') {
433  $last = $idx;
434  }
435  else {
436  // remove space-stuffing
437  if ($line[0] === ' ') $line = substr($line, 1);
438  if (isset($text[$last]) && $line && !$q_level
439  && $text[$last] != '-- '
440  && $text[$last][strlen($text[$last])-1] == ' '
441  ) {
442  $text[$last] .= $line;
443  unset($text[$idx]);
444  if ($mark) {
445  $marks[$last] = true;
446  }
447  }
448  else {
449  $text[$idx] = $line;
450  $last = $idx;
451  }
452  }
453  }
454  $q_level = $q;
455  }
456  if (!empty($marks)) {
457  foreach (array_keys($marks) as $mk) {
458  $text[$mk] = $mark . $text[$mk];
459  }
460  }
461  return implode("\r\n", $text);
462  }
463 
473  public static function format_flowed($text, $length = 72, $charset=null)
474  {
475  $text = preg_split('/\r?\n/', $text);
476  foreach ($text as $idx => $line) {
477  if ($line != '-- ') {
478  if ($level = strspn($line, '>')) {
479  // remove quote chars
480  $line = substr($line, $level);
481  // remove (optional) space-staffing and spaces before the line end
482  $line = rtrim($line, ' ');
483  if ($line[0] === ' ') $line = substr($line, 1);
484  $prefix = str_repeat('>', $level) . ' ';
485  $line = $prefix . self::wordwrap($line, $length - $level - 2, " \r\n$prefix", false, $charset);
486  }
487  else if ($line) {
488  $line = self::wordwrap(rtrim($line), $length - 2, " \r\n", false, $charset);
489  // space-stuffing
490  $line = preg_replace('/(^|\r\n)(From| |>)/', '\\1 \\2', $line);
491  }
492  $text[$idx] = $line;
493  }
494  }
495  return implode("\r\n", $text);
496  }
497 
511  public static function wordwrap($string, $width=75, $break="\n", $cut=false, $charset=null, $wrap_quoted=true)
512  {
513  // Note: Never try to use iconv instead of mbstring functions here
514  // Iconv's substr/strlen are 100x slower (#1489113)
515  if ($charset && $charset != WSC_CHARSET) {
516  mb_internal_encoding($charset);
517  }
518  // Convert \r\n to \n, this is our line-separator
519  $string = str_replace("\r\n", "\n", $string);
520  $separator = "\n"; // must be 1 character length
521  $result = array();
522  while (($stringLength = mb_strlen($string)) > 0) {
523  $breakPos = mb_strpos($string, $separator, 0);
524  // quoted line (do not wrap)
525  if ($wrap_quoted && $string[0] == '>') {
526  if ($breakPos === $stringLength - 1 || $breakPos === false) {
527  $subString = $string;
528  $cutLength = null;
529  }
530  else {
531  $subString = mb_substr($string, 0, $breakPos);
532  $cutLength = $breakPos + 1;
533  }
534  }
535  // next line found and current line is shorter than the limit
536  else if ($breakPos !== false && $breakPos < $width) {
537  if ($breakPos === $stringLength - 1) {
538  $subString = $string;
539  $cutLength = null;
540  }
541  else {
542  $subString = mb_substr($string, 0, $breakPos);
543  $cutLength = $breakPos + 1;
544  }
545  }
546  else {
547  $subString = mb_substr($string, 0, $width);
548  // last line
549  if ($breakPos === false && $subString === $string) {
550  $cutLength = null;
551  }
552  else {
553  $nextChar = mb_substr($string, $width, 1);
554  if ($nextChar === ' ' || $nextChar === $separator) {
555  $afterNextChar = mb_substr($string, $width + 1, 1);
556  // Note: mb_substr() does never return False
557  if ($afterNextChar === false || $afterNextChar === '') {
558  $subString .= $nextChar;
559  }
560  $cutLength = mb_strlen($subString) + 1;
561  }
562  else {
563  $spacePos = mb_strrpos($subString, ' ', 0);
564  if ($spacePos !== false) {
565  $subString = mb_substr($subString, 0, $spacePos);
566  $cutLength = $spacePos + 1;
567  }
568  else if ($cut === false) {
569  $spacePos = mb_strpos($string, ' ', 0);
570  if ($spacePos !== false && ($breakPos === false || $spacePos < $breakPos)) {
571  $subString = mb_substr($string, 0, $spacePos);
572  $cutLength = $spacePos + 1;
573  }
574  else if ($breakPos === false) {
575  $subString = $string;
576  $cutLength = null;
577  }
578  else {
579  $subString = mb_substr($string, 0, $breakPos);
580  $cutLength = $breakPos + 1;
581  }
582  }
583  else {
584  $cutLength = $width;
585  }
586  }
587  }
588  }
589  $result[] = $subString;
590  if ($cutLength !== null) {
591  $string = mb_substr($string, $cutLength, ($stringLength - $cutLength));
592  }
593  else {
594  break;
595  }
596  }
597  if ($charset && $charset != WSC_CHARSET) {
598  mb_internal_encoding(WSC_CHARSET);
599  }
600  return implode($break, $result);
601  }
602 
617  public static function file_content_type($path, $name, $failover = 'application/octet-stream', $is_stream = false, $skip_suffix = false)
618  {
619  $tipoMime = $failover;
620 
621  $realpath = realpath($path);
622  if (
623  $realpath
624  && function_exists( 'finfo_file' )
625  && function_exists( 'finfo_open' )
626  && defined( 'FILEINFO_MIME_TYPE' )
627  )
628  {
629  // Usa la extensión Fileinfo PECL (PHP >=5.3)
630  $tipoMime = finfo_file( finfo_open( FILEINFO_MIME_TYPE ), $realpath );
631  }
632  else //Lo calculamos por la extensión
633  {
634  $fileExtension = pathinfo($path, PATHINFO_EXTENSION);
635 
636  //Vector de tipos MIME
637  $mime_types = array
638  (
639  //texto
640  'txt' => 'text/plain',
641  'htm' => 'text/html',
642  'html' => 'text/html',
643  'php' => 'text/html',
644  'css' => 'text/css',
645  'js' => 'application/javascript',
646  'json' => 'application/json',
647  'xml' => 'application/xml',
648  'csv' => 'text/csv',
649 
650  // imágenes
651  'png' => 'image/png',
652  'jpe' => 'image/jpeg',
653  'jpeg' => 'image/jpeg',
654  'jpg' => 'image/jpeg',
655  'gif' => 'image/gif',
656  'bmp' => 'image/bmp',
657  'ico' => 'image/vnd.microsoft.icon',
658  'tiff' => 'image/tiff',
659  'tif' => 'image/tiff',
660  'svg' => 'image/svg+xml',
661  'svgz' => 'image/svg+xml',
662 
663  // archivos
664  'zip' => 'application/zip',
665  'rar' => 'application/x-rar-compressed',
666  'exe' => 'application/x-msdownload',
667  'msi' => 'application/x-msdownload',
668  'cab' => 'application/vnd.ms-cab-compressed',
669  'tgz' => 'application/tar+gzip',
670  'tar.gz' => 'application/tar+gzip',
671  'tar' => 'application/tar',
672  'gz' => 'application/gzip',
673  '7z' => 'application/x-7z-compressed',
674  's7z' => 'application/x-7z-compressed',
675 
676  // audio/video
677  'mp3' => 'audio/mpeg',
678  'qt' => 'video/quicktime',
679  'mov' => 'video/quicktime',
680  'mpeg' => 'video/mpeg',
681  'avi' => 'video/x-msvideo',
682  'swf' => 'application/x-shockwave-flash',
683  'flv' => 'video/x-flv',
684 
685  // adobe
686  'pdf' => 'application/pdf',
687  'psd' => 'image/vnd.adobe.photoshop',
688  'ai' => 'application/postscript',
689  'eps' => 'application/postscript',
690  'ps' => 'application/postscript',
691 
692  // MSOffice
693  'doc' => 'application/msword',
694  'dot' => 'application/msword',
695  'docx' => 'application/msword',
696  'rtf' => 'application/rtf',
697 
698  'xls' => 'application/vnd.ms-excel',
699  'xlsx' => 'application/vnd.ms-excel',
700  'xlm' => 'application/vnd.ms-excel',
701  'xla' => 'application/vnd.ms-excel',
702  'xlc' => 'application/vnd.ms-excel',
703  'xlt' => 'application/vnd.ms-excel',
704  'xlw' => 'application/vnd.ms-excel',
705 
706  'ppt' => 'application/vnd.ms-powerpoint',
707  'pptx' => 'application/vnd.ms-powerpoint',
708  'pps' => 'application/vnd.ms-powerpoint',
709  'pot' => 'application/vnd.ms-powerpoint',
710 
711  // libreOffice
712  'odc' => 'application/vnd.oasis.opendocument.chart',
713  'otc' => 'application/vnd.oasis.opendocument.chart-template',
714  'odf' => 'application/vnd.oasis.opendocument.formula',
715  'otf' => 'application/vnd.oasis.opendocument.formula-template',
716  'odg' => 'application/vnd.oasis.opendocument.graphics',
717  'otg' => 'application/vnd.oasis.opendocument.graphics-template',
718  'odi' => 'application/vnd.oasis.opendocument.image',
719  'oti' => 'application/vnd.oasis.opendocument.image-template',
720  'odp' => 'application/vnd.oasis.opendocument.presentation',
721  'otp' => 'application/vnd.oasis.opendocument.presentation-template',
722  'ods' => 'application/vnd.oasis.opendocument.spreadsheet',
723  'ots' => 'application/vnd.oasis.opendocument.spreadsheet-template',
724  'odt' => 'application/vnd.oasis.opendocument.text',
725  'otm' => 'application/vnd.oasis.opendocument.text-master',
726  'ott' => 'application/vnd.oasis.opendocument.text-template',
727  'oth' => 'application/vnd.oasis.opendocument.text-web',
728 
729  //VCards...
730  'vcf' => 'text/vcard',
731  'ics' => 'text/calendar',
732  );
733 
734  //Si la extensión se contempla en nuestro vector de tipos...
735  if (array_key_exists($fileExtension, $mime_types))
736  {
737  $tipoMime = $mime_types[$fileExtension];
738  }
739  }
740  return $tipoMime;
741  }//Fin funcion
742 
743 
744 
752  public static function image_content_type($data)
753  {
754  $type = 'jpeg';
755  if (preg_match('/^\x89\x50\x4E\x47/', $data)) $type = 'png';
756  else if (preg_match('/^\x47\x49\x46\x38/', $data)) $type = 'gif';
757  else if (preg_match('/^\x00\x00\x01\x00/', $data)) $type = 'ico';
758  // else if (preg_match('/^\xFF\xD8\xFF\xE0/', $data)) $type = 'jpeg';
759  return 'image/' . $type;
760  }
761 
762 
766  public static function fix_email($email)
767  {
768  $m = array();
769  $parts = array();
770  $delimiter = '@';
771  $strlen = strlen($email);
772  for ($q=$p=$i=0; $i < $strlen; $i++)
773  {
774  if ($email[$i] == "\"" && $email[$i-1] != "\\")
775  {
776  $q = $q ? false : true;
777  }
778  else if (!$q && preg_match("/$delimiter/", $email[$i]))
779  {
780  $parts[] = substr($email, $p, $i - $p);
781  $p = $i + 1;
782  }
783  }
784  $parts[] = (string) substr($email, $p);
785 
786  foreach ($parts as $idx => $part)
787  {
788  // remove redundant quoting (#1490040)
789  if ($part[0] == '"' && preg_match('/^"([a-zA-Z0-9._+=-]+)"$/', $part, $m)) {
790  $parts[$idx] = $m[1];
791  }
792  }
793  return implode('@', $parts);
794  }//fin fix_email
795 }
static fix_email($email)
Definition: WSCMime.php:766
static image_content_type($data)
Definition: WSCMime.php:752
static parse_headers($headers)
Definition: WSCMime.php:243
static decode_mime_string($input, $fallback=null)
Definition: WSCMime.php:137
static format_flowed($text, $length=72, $charset=null)
Definition: WSCMime.php:473
static decode_address_list($input, $max=null, $decode=true, $fallback=null, $addronly=false)
Definition: WSCMime.php:83
static unfold_flowed($text, $mark=null)
Definition: WSCMime.php:401
static get_charset()
Definition: WSCMime.php:40
static decode($input, $encoding='7bit')
Definition: WSCMime.php:222
__construct($default_charset=null)
Definition: WSCMime.php:25
static decode_header($input, $fallback=null)
Definition: WSCMime.php:123
static file_content_type($path, $name, $failover='application/octet-stream', $is_stream=false, $skip_suffix=false)
Definition: WSCMime.php:617
static parse_message($raw_body)
Definition: WSCMime.php:60
static wordwrap($string, $width=75, $break="\, $cut=false, $charset=null, $wrap_quoted=true)
Definition: WSCMime.php:511
static explode_header_string($separator, $str, $remove_comments=false)
Definition: WSCMime.php:334