#native_company# #native_desc#

PERL functions for handling PHP serialized data

By Peter Li
on January 31, 2003

Version: 0.6

Type: Function

Category: Other

License: GNU General Public License

Description: Two PERL functions, unserialize() and serialize(), that allow the user to load PHP serialized data into PERL, manipulate it, and then resave it in PHP format.

# Library for interacting with serialized PHP data.  unserialize()
# will take a serialized PHP data string and convert it into a PERL scalar,
# or referenced hash, using recursive algorithms for arrays.
# serialize() takes a PERL scalar or referenced hash and converts it into a 
# PHP format serialized data string, using recursive algorithms for arrays.
# This will let you load PHP data into PERL, manipulate it, and write it 
# back to PHP.

# I didn't bother to take particular care to keep track of datatypes because 
# both PERL and PHP are loosely typed and therefore should theoretically be 
# able to handle it.  For example, if you use this library to unserialize and 
# then reserialize a PHP serial string, you may find that your booleans and 
# NULLs have been converted into equivalent ints, or even that strings have
# been converted into numericals.  However, none of this should interfere with
# your use of these variables in PHP code, where they should be easily recast
# to their original types, or any other type.

# WARNING: If you look at the way array unserialization is implemented, you'll
# see that I chose to use convoluted RegExp parsing to pull out the key/value
# pairs.  This is not quite as robust as making a state machine.
# An example of where my implementation may get you into trouble is if your 
# serialized PHP data includes strings that themselves match the serialized 
# data format, you may run into problems with the RegExps finding the wrong 
# boundaries between array elements.  However, these types of problems will
# only come up in very special cases; in general the RegExp model is highly
# robust and careful to avoid confusions.

sub unserialize {
        my($raw) = $_[0];
        my(@rawList, $len, $body);
        my(@list, $keyMatch, $valMatch, $key, $value, %assoc);

        # Split raw data into fields
        @rawList = split(/:/, $raw, 3);

        # Get datatype field
        $type = $rawList[0];

        if ($type eq 'N;') {
        # NULL datatype
                return 0;
        } elsif ($type eq 'i' or $type eq 'd' or $type eq 'b') {
        # ... Integer, Float and Boolean datatypes
                $body = $rawList[1];
                return substr($body, 0, -1);
        } elsif ($type eq 's') {
        # ... String datatype
                $len = $rawList[1];
                $body = $rawList[2];
                return substr($body, 1, $len);
        } elsif ($type eq 'a') {
        # ... Use a recursive solution for Arrays
                $keyMatch = 'i:d+;|s:d+:".*?";';
                $valMatch = 'N;|b:[01];|i:d+;|d:d+.d+;|s:d+:".*?";|a:d+:{.*?}';

                $len = $rawList[1];
                $body = $rawList[2];
                $body = substr($body, 1, -1);

                while ($body =~ /^($keyMatch)($valMatch)($keyMatch|$)(.*)/gs) {
                        $assoc{unserialize($1)} = unserialize($2);
                        $body = $3 . $4;

                # Return reference to hash; allows multi-layer arrays
                return %assoc;

        } elsif ($type eq 'O') {
        # ... Use a recursive solution for Objects
                $obj = $rawList[2];
                @objList = split(/:/, $obj, 3);

                $className = substr($objList[0], 1, -1);
                $objLen    = $objList[1];
                $objBody   = $objList[2];
                $objAssoc = unserialize("a:$objLen:$objBody");

                # We must distinguish Objects from Arrays in the internal PERL 
                # representation.  We do this by using an undef hash key
                # 'OBJECT'.  No PHP array should return an undef hash key.
                # Therefore the test exists($object{'OBJECT'}) combined with
                # not(defined($object{'OBJECT'})) should work to determine if
                # the hash returned is an array or an object.
                $object{'OBJECT'} = undef;
                $object{'name'}   = $className;
                $object{'len'}    = $objLen;
                $object{'assoc'}  = $objAssoc;

                return %object;

sub serialize {
        my($object) = $_[0];

        if ($object =~ /^d+$/) {
        # ... Integer datatype
                return "i:$object;";
        } elsif ($object =~ /^d+.d+$/) {
        # ... Float datatype
                return "d:$object;";
        } elsif (ref($object) eq 'HASH') {
        # ... Object and Array datatypes
                if (exists(${$object}{'OBJECT'}) and 
                    not defined(${$object}{'OBJECT'})) {
                # ... Use a recursive solution for Objects 
                        my($className, $objLen, $objAssoc);

                        $className = ${$object}{'name'};
                        $objLen  = ${$object}{'len'};
                        $objAssoc = substr(serialize(${$object}{'assoc'}), 4);

                        return "O:3:"$className":$objLen:$objAssoc";

                } else {
                # ... Use a recursive solution for Arrays 
                        my($key, $value, $serKey, $serVal);
                        my($serHash) = '';

                        while (($key, $value) = each(%{$object})) {
                                $serKey = serialize($key);
                                $serVal = serialize($value);
                                $serHash = "$serHash$serKey$serVal";

                        $hashLen = scalar(keys(%{$object}));

                        return "a:$hashLen:{$serHash}";

        } else {
        # ... String datatype
                $len = length($object);
                return "s:$len:"$object";";

# Copyright Peter H. Li 2002