Version: 0.9
Type: Function
Category: HTML
License: GNU General Public License
Description: First pass at a simple HTML parser. Give it a chunk of HTML, and it will call your methods for every tag it encounters.
# Written March 2001 by Nathaniel Hekman # Comments/bugs to [email protected] # # html_parse will parse a chunk of HTML code. # string $tagFn : function to call for each tag. This function # is called for *every* tag, opening and closing. # It would be easy to change this to call a # different function for open and close tags. # The tagFn has these arguments: # function tagFn($tag, $attribs) # string $tag : the tag # string $attribs : all the attributes, as one long string # wouldn't be hard to change this to an assoc array # string $dataFn : function to call for data (non-tag). This function # is called for all the data in between tags, including # comments. The dataFn has these arguments: # function dataFn($data) # string $data : the text # string $data : the html to parse function html_parse($tagFn, $dataFn, $data) { $comment = false; # Split on '<', so the beginning of each array entry # will be a tag. $TagLine = explode('<', $data); # Loop through each entry -- each entry is a tag # followed by everything up to the next tag. foreach ($TagLine as $l) { # If we're not in a comment block, then check if # one starts here. if (!$comment) { if (substr(ltrim($l), 0, 3) == "!--") { # this is the beginning of a comment, not a tag $comment = true; $commentline = ""; } } # If we're in a comment block, add this entry to # the comment block, and check if it ends here. if ($comment) { $commentline .= "<".$l; if (strstr($l, "-->")) { $line = $commentline; $tag = ""; $comment = false; } else { $line = ""; $tag = ""; } } # Otherwise, split on '>' to separate the tag from # the data. else { list($tag, $line) = explode('>', $l, 2); } # If there's a tag, call the tag function if (strlen($tag)) { $tag = trim($tag); list($tag, $attribline) = explode(' ', $tag, 2); # I could split $attribline into an # associative array, but I don't need # that functionality now. $tagFn($tag, $attribline); } # If there's data, call the data function if (strlen($line)) { $dataFn($line); } } }