<?php
/**
* @version	$Id: mime_decode_helper.php 16513 2017-01-20 14:10:53Z alex $
* @package	In-Portal
* @copyright	Copyright (C) 1997 - 2009 Intechnic. All rights reserved.
* @license      GNU/GPL
* In-Portal is Open Source software.
* This means that this software may have been modified pursuant
* the GNU General Public License, and as distributed it includes
* or is derivative of works licensed under the GNU General Public License
* or other free or open source software licenses.
* See http://www.in-portal.org/license for copyright notices and details.
*/

	defined('FULL_PATH') or die('restricted access!');

	/**
	 * The MIME decoding class
	 *
	 */
	class MimeDecodeHelper extends kHelper {

		/**
		 * Contains headers part of email message
		 *
		 * @var string
		 */
		var $_headerPart;

		/**
		 * Contains body part of email message
		 *
		 * @var string
		 */
		var $_bodyPart;

		/**
		 * Last parsing error message (if any)
		 *
		 * @var string
		 */
		var $_lastErrorMessage = '';

		/**
		 * Decode message headers
		 *
		 * @var bool
		 */
		var $_decodeHeaders = false;

		/**
		 * Include email body in decoded result
		 *
		 * @var bool
		 */
		var $_includeBodies = true;

		/**
		 * Decode email body (only in case, when it will be included in result)
		 *
		 * @var bool
		 */
		var $_decodeBodies = false;

		/**
		 * Displays parsing error
		 *
		 * @param string $str
		 */
		function raiseError($str)
		{
			trigger_error('Error during email parsing: ' . $str, E_USER_WARNING);
		}

		/**
		 * Initializes mime parsing using given email message
		 *
		 * @param string $message
		 */
		function InitHelper($message = null)
		{
			if (!isset($message)) {
				return ;
			}

			list ($header, $body) = $this->_splitBodyHeader($message);

			$this->_headerPart = $header;
			$this->_bodyPart = $body;
		}

		/**
		 * Decodes email message, that was previously set using InitHelper method
		 *
		 * @param bool $decode_headers
		 * @param bool $include_bodies
		 * @param bool $decode_bodies
		 * @return stdClass
		 */
		function decode($decode_headers = false, $include_bodies = false, $decode_bodies = false)
		{
			$this->_decodeHeaders = $decode_headers;
			$this->_includeBodies = $include_bodies;
			$this->_decodeBodies  = $decode_bodies;

			$ret = $this->decodePart($this->_headerPart, $this->_bodyPart);

			if ($ret === false) {
				$this->raiseError($this->_lastErrorMessage);

				return false;
			}

			return $ret;
		}

		function decodePart($headers, $body, $default_ctype = 'text/plain', $only_headers = false)
		{
			$return = new stdClass;

			// process headers
			$return->headers = Array ();
			$headers = $this->_parseHeaders($headers, $this->_decodeHeaders);
			$single_headers = Array ('subject', 'from', 'to', 'cc', 'reply-to', 'date');

			foreach ($headers as $value) {
				$header_name = strtolower($value['name']);
				$header_value = $only_headers ? $this->_decodeHeader($value['value']) : $value['value'];

				if (array_key_exists($header_name, $return->headers) && !is_array($return->headers[$header_name]) && !in_array($header_name, $single_headers)) {
					// this is not a single header, so convert it to array, when 2nd value is found
					$return->headers[$header_name] = Array ( $return->headers[$header_name] );
					$return->headers[$header_name][] = $header_value;
				}
				elseif (array_key_exists($header_name, $return->headers) && !in_array($header_name, $single_headers)) {
					$return->headers[$header_name][] = $header_value;
				}
				else {
					$return->headers[$header_name] = $header_value;
				}
			}

			if ($only_headers) {
				return $return->headers;
			}

			foreach ($headers as $value) {
				$header_name = strtolower($value['name']);
				$header_value = $value['value'];

				switch ($header_name) {
					case 'content-type':
						$content_type = $this->_parseHeaderValue($header_value);

						if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
							// "text/plain", "text/html", etc.
							$return->ctype_primary   = $regs[1];
							$return->ctype_secondary = $regs[2];
						}

						if (array_key_exists('other', $content_type)) {
							// "charset", etc.
							foreach ($content_type['other'] as $p_name => $p_value) {
								$return->ctype_parameters["$p_name"] = $p_value;
							}
						}
						break;

					case 'content-disposition';
						$content_disposition = $this->_parseHeaderValue($header_value);
						$return->disposition = $content_disposition['value'];

						if (array_key_exists('other', $content_disposition)) {
							// "filename", etc.
							foreach ($content_disposition['other'] as $p_name => $p_value) {
								$return->d_parameters["$p_name"] = $p_value;
							}
						}
						break;

					case 'content-transfer-encoding':
						$content_transfer_encoding = $this->_parseHeaderValue($header_value);
						break;
				}
			}

			// process message body
			if (isset($content_type)) {
				switch ( strtolower($content_type['value']) ) {
					case 'text/plain':
					case 'text/html':
						if ($this->_includeBodies) {
							$encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
							$return->body = $this->_decodeBodies ? $this->_decodeBody($body, $encoding) : $body;
						}
						break;

					case 'multipart/parallel':
					case 'multipart/report': // RFC1892
					case 'multipart/signed': // PGP
					case 'multipart/digest':
					case 'multipart/alternative':
					case 'multipart/appledouble':
					case 'multipart/related':
					case 'multipart/mixed':
						if (!isset($content_type['other']['boundary'])) {
							$this->_lastErrorMessage = 'No boundary found for ' . $content_type['value'] . ' part';
							return false;
						}

						$default_ctype = (strtolower($content_type['value']) === 'multipart/digest') ? 'message/rfc822' : 'text/plain';

						$parts = $this->_boundarySplit($body, $content_type['other']['boundary']);

						for ($i = 0; $i < count($parts); $i++) {
							list ($part_header, $part_body) = $this->_splitBodyHeader($parts[$i]);
							$part = $this->decodePart($part_header, $part_body, $default_ctype);

							if ($part === false) {
								// part is broken
								$this->raiseError($this->_lastErrorMessage);
							}

							$return->parts[] = $part;
						}
						break;

					case 'message/rfc822':
					case 'message/disposition-notification':
						// create another instance, not to interfear with main parser
						/** @var MimeDecodeHelper $mime_decode_helper */
						$mime_decode_helper = $this->Application->makeClass('MimeDecodeHelper');

						$mime_decode_helper->InitHelper($body);

						$return->parts[] = $mime_decode_helper->decode(true, $this->_includeBodies, $this->_decodeBodies);
						unset($mime_decode_helper);
						break;

					default:
						if ($this->_includeBodies) {
							$encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
							$return->body = $this->_decodeBodies ? $this->_decodeBody($body, $encoding) : $body;
						}
						break;
				}

			} else {
				$ctype = explode('/', $default_ctype);
				$return->ctype_primary = $ctype[0];
				$return->ctype_secondary = $ctype[1];

				if ($this->_includeBodies) {
					$return->body = $this->_decodeBodies ? $this->_decodeBody($body) : $body;
				}
			}

			return $return;
		}

		/**
		 * Divides message into header and body parts
		 *
		 * @param string $input
		 * @return Array
		 */
		function _splitBodyHeader($input)
		{
			if (strpos($input, "\r\n\r\n") === false) {
				return Array ($input, '');
			} elseif (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
				return Array ($match[1], $match[2]);
			} else {
				$this->_lastErrorMessage = 'Could not split header and body';

				return false;
			}
		}

		/**
		 * Parses headers string into array and optionally decode them
		 *
		 * @param string $input
		 * @param bool $decode
		 * @return Array
		 */
		function _parseHeaders($input, $decode = false)
		{
			if (!$input) {
				return Array ();
			}

			$ret = Array ();

			// Unfold the input
			$input   = preg_replace("/\r\n/", "\n", $input);
			$input   = preg_replace("/\n(\t| )+/", ' ', $input);
			$headers = explode("\n", trim($input));

			foreach ($headers as $value) {
				$pos = strpos($value, ':');
				$hdr_name = substr($value, 0, $pos);
				$hdr_value = substr($value, $pos + 1);

				if ($hdr_value[0] == ' ') {
					$hdr_value = substr($hdr_value, 1);
				}

				$ret[] = Array (
					'name'  => $hdr_name,
					'value' => $decode ? $this->_decodeHeader($hdr_value) : $hdr_value
				);
			}

			return $ret;
		}

		/**
		 * Parses header value in following format (without quotes): "multipart/alternative; boundary=001636c9274051e332048498d8cc"
		 *
		 * @param string $input
		 * @return Array
		 */
		function _parseHeaderValue($input)
		{
			$ret = Array ();
			$pos = strpos($input, ';');

			if ($pos === false) {
				$ret['value'] = trim($input);

				return $ret;
			}

			// get text until first ";"
			$ret['value'] = trim(substr($input, 0, $pos));
			$input = trim(substr($input, $pos + 1));

			if (strlen($input) > 0) {
				// This splits on a semi-colon, if there's no preceeding backslash
				// Can't handle if it's in double quotes however. (Of course anyone
				// sending that needs a good slap).
				$parameters = preg_split('/\s*(?<!\\\\);\s*/i', $input);

				for ($i = 0; $i < count($parameters); $i++) {
					$pos = strpos($parameters[$i], '=');
					$param_name  = substr($parameters[$i], 0, $pos);
					$param_value = substr($parameters[$i], $pos + 1);

					if ($param_value[0] == '"') {
						$param_value = substr($param_value, 1, -1);
					}

					$ret['other']["$param_name"] = $param_value;
					$ret['other'][ strtolower($param_name) ] = $param_value;
				}
			}

			return $ret;
		}

		/**
		 * Splits input body using given boundary
		 *
		 * @param string $input
		 * @param string $boundary
		 * @return Array
		 */
		function _boundarySplit($input, $boundary)
		{
			$tmp = explode('--' . $boundary, $input);

			for ($i = 1; $i < count($tmp) - 1; $i++) {
				$parts[] = $tmp[$i];
			}

			return $parts;
		}

		/**
		 * Decode message header value
		 *
		 * @param string $input
		 * @return string
		 */
		function _decodeHeader($input)
		{
			// Remove white space between encoded-words (http://www.ietf.org/rfc/rfc2047.txt)
			$regexp = '/(=\?[^?]+\?(Q|B)\?[^?]*\?=)(\s)+=\?/i';

			while (preg_match($regexp, $input)) {
				// process each word separately
				$input = preg_replace($regexp, '\1=?', $input);
			}

			// For each encoded-word...
			while (preg_match('/(=\?([^?]+)\?(Q|B)\?([^?]*)\?=)/i', $input, $matches)) {
				$encoded = $matches[1];
				$charset = $matches[2];
				$encoding = $matches[3];
				$text = $matches[4];

				switch (strtoupper($encoding)) {
					case 'B':
						$text = base64_decode($text);
						break;

					case 'Q':
						// $text = $this->_quotedPrintableDecode($text);
						$text = str_replace('_', ' ', $text);
						preg_match_all('/=([a-f0-9]{2})/i', $text, $matches);

						foreach($matches[1] as $value) {
							$text = str_replace('=' . $value, chr(hexdec($value)), $text);
						}
						break;
				}

				$input = $this->convertEncoding($charset, str_replace($encoded, $text, $input));
			}

			return $input;
		}

		/**
		 * Converts encoding to one, that site uses
		 *
		 * @param string $from_encoding
		 * @param string $text
		 * @return string
		 * @author Alex
		 */
		function convertEncoding($from_encoding, $text)
		{
			if ( !function_exists('mb_convert_encoding') ) {
				// if mbstring extension not installed
				return $text;
			}

			static $to_encoding = false;

			if ( $to_encoding === false ) {
				$to_encoding = CHARSET;
			}

			return mb_convert_encoding($text, $to_encoding, $from_encoding);

		}

		/**
		 * Decodes message body
		 *
		 * @param string $input
		 * @param string $encoding
		 * @return string
		 */
		function _decodeBody($input, $encoding = '7bit')
		{
			switch (strtolower($encoding)) {
				case 'quoted-printable':
					return $this->_quotedPrintableDecode($input);
					break;

				case 'base64':
					return base64_decode($input);
					break;
			}

			// for 7bit, 8bit, anything else
			return $input;
		}

		/**
		 * Decodes "quoted-printable" encoding
		 *
		 * @param string $string
		 * @return string
		 */
		function _quotedPrintableDecode($string)
		{
			// Remove soft line breaks
			$string = preg_replace("/=\r?\n/", '', $string);

			// Replace encoded characters
			if (preg_match_all('/=[a-f0-9]{2}/i', $string, $matches)) {
				$matches = array_unique($matches[0]);
				foreach ($matches as $value) {
					$string = str_replace($value, chr(hexdec(substr($value,1))), $string);
				}
			}

			return $string;
		}
	}