Version: 1.0a
Type: Class
Category: Other
License: GNU General Public License
Description: PHP Microsoft DBX File Reader reads files in the mailbox format used by Outlook Express.
<? /********************************************************************************************* Name: Author: Antony Raijekov a.k.a Zeos ([email protected]) Bulgaria/Sofia Version: 1.0 Description: Microsoft DBX file reader /such as Outlook Express Mailbox database files/ Date: 9/27/2002 2:53 GMT +2 License: GPL Note: Please if you use this class send me an e-mail, just for information 10x I test this class with 15 folders. ~ 10MB - 319 mails, the class processed them for 0.6 sec :) *********************************************************************************************/ // CLASS IMPLEMENTATION /******************************************************************************************** * @description MS DBX file reader /e.g. Outlook Express DBX file reader/ * @author Zeos <[email protected]> * @version 1.0a * @copyright GPL license * @access public *******************************************************************************************/ class ms_dbx_reader { var $fname = null; var $mails = array(); var $debug = false; var $tmp = array(); function clear() { $this -> fname = ''; unset($this -> mails); $this -> mails = array(); unset($this -> tmp); $this -> tmp = array(); } function ms_dbx_reader($fname,$debug = false) { $this -> debug = $debug; $this -> fname = $fname; //open file [fname] $fp = @fopen($fname,"rb"); if(!$fp) return false; //seek to read fileInfo fseek($fp,0xC4); $header_info = @unpack("Lposition/LDataLength/nHeaderLength/nFlagCount",@fread($fp,12)); //tables count in DBX $tables = $header_info['position']; //show debug info. if($this -> debug) print "Processing {$header_info[position]} message(s) in [".basename($fname)."]......"; //go to the first table offest and process it if($header_info[position] > 0) { fseek($fp,0x30); $buf = unpack("Lposition",fread($fp,4)); $position = $buf[position]; $this -> readIndex($fp,$position); $res = true; } if($this -> debug) print 'done<br>'; fclose($fp); } //helper function to read a null-terminated string from binary file function readstring(&$buf,$pos) { $str = ''; if($len = strpos(substr($buf,$pos),chr(0))) $str = substr($buf,$pos,$len); return $str; } function ReadMessage($fp,$position) { $msg = false; if ($position > 0) { fseek($fp,0xC4); $IndexItemsCount = array_pop(unpack("S",fread($fp,4))); if($IndexItemsCount > 0) { fseek($fp,$position); $msg = ''; $part = 0; while (!feof($fp)) { $part++; $s = fread($fp,528); if(strlen($s) == 0) break; $msg_item = unpack("LFilePos/LUnknown/LItemSize/LNextItem/a511Content",$s); if($msg_item['FilePos'] <> $position) die("Read $part part of message verify error"); $msg .= substr($msg_item['Content'],0,$msg_item['ItemSize']); $position = $msg_item['NextItem']; if($position == 0) break; fseek($fp,$position); } } } return $msg; } function ReadMessageInfo($fp,$position) { $message_info = array(); fseek($fp,$position); $msg_header = unpack("Lposition/LDataLength/SHeaderLength/SFlagCount",fread($fp,12)); if($msg_header['position'] != $position) die('Message Info verify error'); $message_info['HeaderPosition'] = $position; $flags = ($msg_header['FlagCount'] & 0xFF); $DataSize = $msg_header['DataLength'] - ($flags*4); $size = 4*$flags; $FlagsBuffer = fread($fp,$size); $size = $DataSize; $DataBuffer = fread($fp,$size); $message_info = array(); //process flags for ($i = 0; $i < $flags; $i++ ) { $pos = 0; $f = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))); //print "FLAG:".sprintf("0x%x",($f & 0xFF))."<br>"; switch ($f & 0xFF) { case 0x1 : $pos = $pos + ($f >> 8); $message_info['MsgFlags'] = array_pop(unpack("C",substr($DataBuffer,$pos,1))); $pos++; $message_info['MsgFlags'] += array_pop(unpack("C",substr($DataBuffer,$pos,1)))*256; $pos++; $message_info['MsgFlags'] += array_pop(unpack("C",substr($DataBuffer,$pos,1)))*65536; break; case 0x2 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['Sent'] = array_pop(unpack("L",substr($DataBuffer,$pos,4))); break; case 0x4 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['position'] = array_pop(unpack("L",substr($DataBuffer,$pos,4))); break; case 0x7 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['MessageID'] = $this -> readstring($DataBuffer,$pos); break; case 0x8 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['Subject'] = $this -> readstring($DataBuffer,$pos); break; case 0x9 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['From_reply'] = $this -> readstring($DataBuffer,$pos); break; case 0xA : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['References'] = $this -> readstring($DataBuffer,$pos); break; case 0xB : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['Newsgroup'] = $this -> readstring($DataBuffer,$pos); break; case 0xD : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['From'] = $this -> readstring($DataBuffer,$pos); break; case 0xE : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['Reply_To'] = $this -> readstring($DataBuffer,$pos); break; case 0x12 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['Received'] = array_pop(unpack("L",substr($DataBuffer,$pos,4))); break; case 0x13 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['Receipt'] = $this -> readstring($DataBuffer,$pos); break; case 0x1A : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['Account'] = $this -> readstring($DataBuffer,$pos); break; case 0x1B : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; $message_info['AccountID'] = intval($this -> readstring($DataBuffer,$pos)); break; case 0x80 : $message_info['Msg'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; break; case 0x81 : $message_info['MsgFlags'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; break; case 0x84 : $message_info['position'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; break; case 0x91 : $message_info['size'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8; break; } } return $message_info; } // end func ReadMessageInfo function readIndex($fp,$position) { fseek($fp,$position); $index_header = unpack("LFilePos/LUnknown1/LPrevIndex/LNextIndex/LCount/LUnknown",fread($fp,24)); if($index_header['FilePos'] != $position) die('Verify error'); $this -> tmp[$position] = true; //push it into list of processed items if(($index_header[NextIndex] > 0) AND ($this -> tmp[$index_header[NextIndex]] != true)) $this ->readIndex($fp,$index_header['NextIndex']); if(($index_header[PrevIndex] > 0) AND ($this -> tmp[$index_header[PrevIndex]] != true)) $this ->readIndex($fp,$index_header['PrevIndex']); $icount = $index_header[Count] >> 8; if($icount > 0) { fseek($fp,$index_header['FilePos'] + 24); $buf = fread($fp,12*$icount); for ($i = 0; $i < $icount; $i++) { $hdr_buf = substr($buf,$i*12,12); $IndexItem = unpack("LHeaderPos/LChildIndex/LUnknown",$hdr_buf); if($IndexItem['HeaderPos']>0) { if (strtolower($this -> fname) == 'folders.dbx') //read_folder($fp,$IndexItem['HeaderPos']); print 'Read folder not implemented in v1.0a<br>'; else { $mail['info'] = $this ->ReadMessageInfo($fp,$IndexItem['HeaderPos']); $mail['content'] = $this ->ReadMessage($fp,$mail['info']['position']); $this -> mails[] = $mail; } } if(($IndexItem['ChildIndex']>0) AND ($this -> tmp[$IndexItem['ChildIndex']] != true) ) $this ->ReadIndex($fp,$IndexItem['ChildIndex']); } //end for } //end if } //end func readIndex //debug function to display human readble message flags (Just for debugging purpose) function decode_flags($x) { $decode_flag['DOWNLOADED'] = 0x1; $decode_flag['MARKED'] = 0x20; $decode_flag['READED'] = 0x80; $decode_flag['DOWNLOAD_LATER'] = 0x100; $decode_flag['NEWS_MSG'] = 0x800; // to verify $decode_flag['ATTACHMENTS'] = 0x4000; $decode_flag['REPLY'] = 0x80000; $decode_flag['INSPECT_CONVERSATION'] = 0x400000; $decode_flag['IGNORE_CONVERSATION'] = 0x800000; $decoded_flags = ''; if(($x & $decode_flag['NEWS_MSG']) != 0) $decoded_flags .= "NEWS MESSAGEn<br>"; if(($x & $decode_flag['DOWNLOAD_LATER']) != 0) $decoded_flags .= "DOWNLOAD LATERn<br>"; if(($x & $decode_flag['DOWNLOADED']) != 0) $decoded_flags .= "DOWNLOADEDn<br>"; if(($x & $decode_flag['READED']) != 0) $decoded_flags .= "READEDn<br>"; if(($x & $decode_flag['MARKED']) != 0) $decoded_flags .= "MARKEDn<br>"; if(($x & $decode_flag['ATTACHMENTS']) != 0) $decoded_flags .= "ATTACHMENTSn<br>"; if(($x & $decode_flag['REPLY']) != 0) $decoded_flags .= "REPLYn<br>"; if(($x & $decode_flag['INSPECT_CONVERSATION']) != 0) $decoded_flags .= "INSPECT CONVERSATIONn<br>"; if(($x & $decode_flag['IGNORE_CONVERSATION']) != 0) $decoded_flags .= "IGNORE CONVERSATIONn<br>"; return $decoded_flags; } } // end class ms_dbx_reader /******************************************************************************************** * Description: Example of use ms_dbx_reader class * Date: 9/27/2002 * Author: Zeos <[email protected]> ********************************************************************************************/ //create object and pass the DBX file for binary reading and soring into array of mails $mailbox = new ms_dbx_reader('./data/else.dbx',true); //get first mail from mailbox object $mail = $mailbox -> mails[1]; //prepare mail info for friendly display $mail_info = ''; foreach ($mail['info'] as $k => $v) $mail_info .= "$k = ".htmlspecialchars($v)."n<br>"; $mail_flags = $mailbox -> decode_flags($mail['info']['MsgFlags']); //prepare raw mail for friendly display $mail_size = strlen($mail['content']); $mail_content = nl2br(htmlspecialchars($mail['content'])); //just dump them ?> <html> <head> <title>MS DBX file reader class by Zeos [[email protected]]</title> <meta NAME="Author" CONTENT="Zeos"> <meta NAME="Keywords" CONTENT="MS Outlook DBX reader"> <meta NAME="Description" CONTENT="Microsoft DBX file reader (such as Outlook Express Mailbox database files)"> <meta http-equiv="Content-Type" content="text/html; charset=windows-1251"> <!-- just for nice vie :) --> <STYLE> BODY, TD, TR, TH, P { font-family: Arial; font-size: 14px; color: Black; } </STYLE> </head> <body> <hr> <!-- disply message information stored in DBX file --> <?=$mail_info?> <!-- disply OE message flags --> flags:<br> <BLOCKQUOTE><?=$mail_flags?></BLOCKQUOTE> raw mail size: <?=$mail_size?> bytes<br> <hr> <!-- display MIME message as-is --> <?=$mail_content?> </body> </html>