#native_company# #native_desc#
#native_cta#

MS Outlook express DBX file reader

By Antony Raijekov
on September 27, 2002

Version: 1.0a

Type: Class

Category: Other

License: GNU General Public License

Description: PHP Microsoft DBX File Reader reads files in the mailbox format used by Outlook Express.

<?
/*********************************************************************************************
 Name:			ms_dbx_reader.class.inc.php
 Author:		Antony Raijekov a.k.a Zeos ([email protected]) Bulgaria/Sofia
 Version:		1.0
 Description:	Microsoft DBX file reader /such as Outlook Express Mailbox database files/
 Date:			9/27/2002 2:53 GMT +2
 License:		GPL 

 Note:			Please if you use this class send me an e-mail, just for information 10x	
				I test this class with 15 folders. ~ 10MB - 319 mails, the class processed them for 0.6 sec :)
*********************************************************************************************/

//								CLASS IMPLEMENTATION

/********************************************************************************************
 * @description		MS DBX file reader /e.g. Outlook Express DBX file reader/
 * @author			Zeos <[email protected]>
 * @version			1.0a
 * @copyright		GPL license
 * @access			public
 *******************************************************************************************/

class ms_dbx_reader
{
	var $fname = null;
	var $mails = array();
	var $debug = false;
	var $tmp   = array();

	function clear()
	{
		$this -> fname = '';
		unset($this -> mails);
		$this -> mails = array();
		unset($this -> tmp);
		$this -> tmp = array();
	}

	function ms_dbx_reader($fname,$debug = false)
	{	
		$this -> debug = $debug;
		$this -> fname = $fname;

		//open file [fname]
		$fp = @fopen($fname,"rb");
		if(!$fp) return false;
		//seek to read fileInfo
		fseek($fp,0xC4);
		$header_info = @unpack("Lposition/LDataLength/nHeaderLength/nFlagCount",@fread($fp,12));
		//tables count in DBX
		$tables = $header_info['position'];
		//show debug info.
		if($this -> debug) print "Processing {$header_info[position]} message(s) in [".basename($fname)."]......";
		//go to the first table offest and process it
		if($header_info[position] > 0)
		{
			fseek($fp,0x30);
			$buf = unpack("Lposition",fread($fp,4));
			$position = $buf[position];
			$this -> readIndex($fp,$position);
			$res = true;
		}
		if($this -> debug) print 'done<br>';
		fclose($fp);
	}	

	//helper function to read a null-terminated string from binary file
	function readstring(&$buf,$pos)
	{
		$str = '';
		if($len = strpos(substr($buf,$pos),chr(0)))	$str = substr($buf,$pos,$len);
		return $str;
	}

	function ReadMessage($fp,$position)
	{
		$msg = false;
		if ($position > 0) 
		{
			fseek($fp,0xC4);
			$IndexItemsCount = array_pop(unpack("S",fread($fp,4)));
			if($IndexItemsCount > 0)
			{
				fseek($fp,$position);
				$msg = ''; $part = 0;
				while (!feof($fp)) 
				{
					$part++;
					$s = fread($fp,528);
					if(strlen($s) == 0)  break;
					$msg_item = unpack("LFilePos/LUnknown/LItemSize/LNextItem/a511Content",$s);
					if($msg_item['FilePos'] <> $position) die("Read $part part of message verify error");
					$msg .= substr($msg_item['Content'],0,$msg_item['ItemSize']);
					$position = $msg_item['NextItem'];
					if($position == 0) break;
					fseek($fp,$position);
				}
			}
		}
		return $msg;
	}

	function ReadMessageInfo($fp,$position)
	{
		$message_info = array();
		fseek($fp,$position);
		$msg_header = unpack("Lposition/LDataLength/SHeaderLength/SFlagCount",fread($fp,12));
		if($msg_header['position'] != $position) die('Message Info verify error');
		$message_info['HeaderPosition'] = $position;
		$flags  = ($msg_header['FlagCount'] & 0xFF);
		$DataSize = $msg_header['DataLength'] - ($flags*4);
		$size	  = 4*$flags;
		$FlagsBuffer = fread($fp,$size);
		$size	  = $DataSize;
		$DataBuffer  = fread($fp,$size);
		$message_info = array();
		//process flags
		for ($i = 0; $i < $flags; $i++ ) 
		{
			$pos = 0;
			$f = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4)));
			//print "FLAG:".sprintf("0x%x",($f & 0xFF))."<br>";
			switch ($f & 0xFF) 
			{
				case 0x1	:	$pos = $pos + ($f >> 8);	
								$message_info['MsgFlags']  = array_pop(unpack("C",substr($DataBuffer,$pos,1)));
								$pos++;
								$message_info['MsgFlags'] += array_pop(unpack("C",substr($DataBuffer,$pos,1)))*256;
								$pos++;
								$message_info['MsgFlags'] += array_pop(unpack("C",substr($DataBuffer,$pos,1)))*65536;
								break;

				case 0x2	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['Sent'] = array_pop(unpack("L",substr($DataBuffer,$pos,4)));
								break;

				case 0x4	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['position'] = array_pop(unpack("L",substr($DataBuffer,$pos,4)));
								break;

				case 0x7	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['MessageID'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0x8	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['Subject'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0x9	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['From_reply'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0xA	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['References'] = $this -> readstring($DataBuffer,$pos);
								break;
				case 0xB	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['Newsgroup'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0xD	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['From'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0xE	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['Reply_To'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0x12	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['Received'] = array_pop(unpack("L",substr($DataBuffer,$pos,4)));
								break;

				case 0x13	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['Receipt'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0x1A	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['Account'] = $this -> readstring($DataBuffer,$pos);
								break;

				case 0x1B	:	$pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								$message_info['AccountID'] = intval($this -> readstring($DataBuffer,$pos));
								break;

				case 0x80	:	$message_info['Msg'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								break;

				case 0x81	:	$message_info['MsgFlags'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								break;

				case 0x84	:	$message_info['position'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								break;

				case 0x91	:	$message_info['size'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;				
								break;
			}
		}	
		return $message_info;
	} // end func ReadMessageInfo

	function readIndex($fp,$position)
	{
		fseek($fp,$position);
		$index_header = unpack("LFilePos/LUnknown1/LPrevIndex/LNextIndex/LCount/LUnknown",fread($fp,24));
		if($index_header['FilePos'] != $position) die('Verify error');
		$this -> tmp[$position] = true;	//push it into list of processed items
		if(($index_header[NextIndex] > 0) AND ($this -> tmp[$index_header[NextIndex]] != true)) $this ->readIndex($fp,$index_header['NextIndex']);
		if(($index_header[PrevIndex] > 0) AND ($this -> tmp[$index_header[PrevIndex]] != true)) $this ->readIndex($fp,$index_header['PrevIndex']);
		$icount = $index_header[Count] >> 8;
		if($icount > 0)
		{
			fseek($fp,$index_header['FilePos'] + 24);
			$buf = fread($fp,12*$icount);
			for ($i = 0; $i < $icount; $i++) 
			{
				$hdr_buf = substr($buf,$i*12,12);
				$IndexItem = unpack("LHeaderPos/LChildIndex/LUnknown",$hdr_buf);
				if($IndexItem['HeaderPos']>0)
				{
					if (strtolower($this -> fname) == 'folders.dbx') 
						//read_folder($fp,$IndexItem['HeaderPos']);
						print 'Read folder not implemented in v1.0a<br>';
					else
					{
						$mail['info']    = $this ->ReadMessageInfo($fp,$IndexItem['HeaderPos']);
						$mail['content'] = $this ->ReadMessage($fp,$mail['info']['position']);
						$this -> mails[] = $mail;
					}
				}
				if(($IndexItem['ChildIndex']>0) AND ($this -> tmp[$IndexItem['ChildIndex']] != true) ) $this ->ReadIndex($fp,$IndexItem['ChildIndex']);
			} //end for
		} //end if
	} //end func readIndex

	//debug function to display human readble message flags (Just for debugging purpose)
	function decode_flags($x)
	{
		$decode_flag['DOWNLOADED']				= 0x1;
		$decode_flag['MARKED']					= 0x20;
		$decode_flag['READED']					= 0x80;
		$decode_flag['DOWNLOAD_LATER']			= 0x100;
		$decode_flag['NEWS_MSG']				= 0x800;  // to verify
		$decode_flag['ATTACHMENTS']				= 0x4000;
		$decode_flag['REPLY']					= 0x80000;
		$decode_flag['INSPECT_CONVERSATION']	= 0x400000;
		$decode_flag['IGNORE_CONVERSATION']		= 0x800000;

		$decoded_flags = '';

		if(($x & $decode_flag['NEWS_MSG']) != 0) $decoded_flags .= "NEWS MESSAGEn<br>";
		if(($x & $decode_flag['DOWNLOAD_LATER']) != 0) $decoded_flags .= "DOWNLOAD LATERn<br>";
		if(($x & $decode_flag['DOWNLOADED']) != 0) $decoded_flags .= "DOWNLOADEDn<br>";
		if(($x & $decode_flag['READED']) != 0) $decoded_flags .= "READEDn<br>";
		if(($x & $decode_flag['MARKED']) != 0) $decoded_flags .= "MARKEDn<br>";
		if(($x & $decode_flag['ATTACHMENTS']) != 0) $decoded_flags .= "ATTACHMENTSn<br>";
		if(($x & $decode_flag['REPLY']) != 0) $decoded_flags .= "REPLYn<br>";
		if(($x & $decode_flag['INSPECT_CONVERSATION']) != 0) $decoded_flags .= "INSPECT CONVERSATIONn<br>";
		if(($x & $decode_flag['IGNORE_CONVERSATION']) != 0) $decoded_flags .= "IGNORE CONVERSATIONn<br>";

		return $decoded_flags;
	}

} // end class ms_dbx_reader


/********************************************************************************************
 * Description:	Example of use ms_dbx_reader class
 * Date:		9/27/2002
 * Author:		Zeos <[email protected]>
 ********************************************************************************************/

//create object and pass the DBX file for binary reading and soring into array of mails
$mailbox = new ms_dbx_reader('./data/else.dbx',true);

//get first mail from mailbox object
$mail = $mailbox -> mails[1];

//prepare mail info for friendly display
$mail_info	  = '';
foreach ($mail['info'] as $k => $v) $mail_info .= "$k = ".htmlspecialchars($v)."n<br>";
$mail_flags	  = $mailbox -> decode_flags($mail['info']['MsgFlags']);
//prepare raw mail for friendly display
$mail_size	  = strlen($mail['content']);
$mail_content = nl2br(htmlspecialchars($mail['content']));

//just dump them 
?>
<html>
<head>
	<title>MS DBX file reader class by Zeos [[email protected]]</title>
	<meta NAME="Author" CONTENT="Zeos">
	<meta NAME="Keywords" CONTENT="MS Outlook DBX reader">
	<meta NAME="Description" CONTENT="Microsoft DBX file reader (such as Outlook Express Mailbox database files)">
	<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
	<!-- just for nice vie :) -->
	<STYLE>
		BODY, TD, TR, TH, P {
			font-family:	Arial;
			font-size:		14px;
			color:			Black;
		}
	</STYLE>
</head>
<body>
<hr>
<!-- disply message information stored in DBX file -->
<?=$mail_info?>
<!-- disply OE message flags -->
flags:<br>
<BLOCKQUOTE><?=$mail_flags?></BLOCKQUOTE>
raw mail size: <?=$mail_size?> bytes<br>
<hr>
<!-- display MIME message as-is -->
<?=$mail_content?>
</body>
</html>