Version: 4.2
Type: Class
Category: File Management
License: Other
Description: comma separated values – read and write.
example: see http://www.blueshoes.org/en/framework/util/csv_util/
Features:
supports any separator char sequence, default is semicolon “;”.
supports separator characters in the values. eg you use a ; as separator, your line may look like
blah;hello world;”foo”;”foo;bar”;”this is a “”string”””;got it?;foo
as you can see, the values can be in “quotes”. if your text uses quotes itself as in the “string”
example, they are escaped in ms-style with 2 quotes. and by using quotes we can even have your
separator inside the text (example “foo;bar”).
line breaks. a csv line may spread over multiple lines using crlf in a field value.
see the checkMultiline param and the _checkMultiline() method.
this class is part of the blueshoes application framework, see http://www.blueshoes.org/.
/** * csv util class. csv = comma separated value. * * features: * - supports any separator char sequence, default is semicolon ";" * - supports separator characters in the values. eg you use a ; as separator, your line may look like * blah;hello world;"foo";"foo;bar";"this is a ""string""";got it?;foo * as you can see, the values can be in "quotes". if your text uses quotes itself as in the "string" * example, they are escaped in ms-style with 2 quotes. and by using quotes we can even have your * separator inside the text (example "foo;bar"). * - line breaks. a csv line may spread over multiple lines using crlf in a field value. * see the checkMultiline param and the _checkMultiline() method. * * missing: * - option to change quote char (") to something else * * thanks to: steffen at hung dot ch * * dependencies: none. * * @author andrej arn <andrej at blueshoes dot org> * @copyright blueshoes.org * @version 4.2.$id$ * @package util * @access pseudostatic */ class Bs_CsvUtil { /** * Constructor. */ function Bs_CsvUtil() { } /** * reads in a cvs-file and returns it as a 2-dim vector. * @param string $fullPath (fullpath to the cvs file) * @param bool $checkMultiline (default is FALSE, see _checkMultiline()) * @see csvArrayToArray() */ function csvFileToArray($fullPath, $separator=';', $trim='none', $removeHeader=FALSE, $removeEmptyLines=FALSE, $checkMultiline=FALSE) { $fileContent = @file($fullPath); if (!$fileContent) return FALSE; //hrm, having similar prob as in csvStringToArray() here except this time i need it for n not r. //so let's remove that aswell ... --andrej while (list($k) = each($fileContent)) { if ((substr($fileContent[$k], -1) == "r") || (substr($fileContent[$k], -1) == "n")) { $fileContent[$k] = substr($fileContent[$k], 0, -1); } } reset($fileContent); if ($checkMultiline) $fileContent = $this->_checkMultiline($fileContent); return $this->csvArrayToArray($fileContent, $separator, $trim, $removeHeader, $removeEmptyLines); } /** * takes a csv-string and returns it as a 2-dim vector. * @param string $string * @param bool $checkMultiline (default is FALSE, see _checkMultiline()) * @see csvArrayToArray() */ function csvStringToArray($string, $separator=';', $trim='none', $removeHeader=FALSE, $removeEmptyLines=FALSE, $checkMultiline=FALSE) { if (empty($string)) return array(); $array = explode("n", $string); //short hack: on windows we should explode by "rn". if not, the elements in $array still end with r. //so let's remove that ... --andrej while (list($k) = each($array)) { if (substr($array[$k], -1) == "r") { $array[$k] = substr($array[$k], 0, -1); } } reset($array); if ((!is_array($array)) || empty($array)) return array(); if ($checkMultiline) $array = $this->_checkMultiline($array); return $this->csvArrayToArray($array, $separator, $trim, $removeHeader, $removeEmptyLines); } /** * * reads in a cvs array and returns it as a 2-dim vector. * * cvs = comma separated value. you can easily export that from * an excel file for example. it looks like: * * headerCellOne;headerCellTwo;headerCellThree * dataCellOne;dataCellTwo;dataCellThree * apple;peach;banana;grapefruit * linux;windows;mac * 1;2;3 * * note I: all returned array elements are strings even if the values were numeric. * note II: it may be that one array has another array-length than another. in the example * above, the fruits have 4 elements while the others just have 3. this is not * catched. ideally every sub-array would have 4 elements. this would have to be * added when needed, maybe with another param in the function call. * * @access public pseudostatic * @param string $fullPath (fullpath to the cvs file) * @param array $array (hash or vector where the values are the csv lines) * @param string $separator (cell separator, default is ';') * @param string $trim (if we should trim the cells, default is 'none', can also be 'left', 'right' or 'both'. 'none' kinda makes it faster, omits many function calls, remember that.) * @param bool $removeHeader (default is FALSE. would remove the first line which usually is the title line.) * @param bool $removeEmptyLines (default is FALSE. would remove empty lines, that is, lines where the cells are empty. white spaces count as empty aswell.) * @return array (2-dim vector. it may be an empty array if there is no data.) * @throws bool FALSE on any error. * @see csvStringToArray() */ function csvArrayToArray($array, $separator=';', $trim='none', $removeHeader=FALSE, $removeEmptyLines=FALSE) { switch ($trim) { case 'none': $trimFunction = FALSE; break; case 'left': $trimFunction = 'ltrim'; break; case 'right': $trimFunction = 'rtrim'; break; default: //'both': $trimFunction = 'trim'; break; } $sepLength = strlen($separator); if ($removeHeader) { array_shift($array); } $ret = array(); reset($array); while (list(,$line) = each($array)) { $offset = 0; $lastPos = 0; $lineArray = array(); do { //find the next separator $pos = strpos($line, $separator, $offset); if ($pos === FALSE) { //no more separators. $lineArray[] = substr($line, $lastPos); break; } //now let's see if it is inside a field value (text) or it is a real separator. //it can only be a separator if the number of quotes (") since the last separator //is straight (not odd). $currentSnippet = substr($line, $lastPos, $pos-$lastPos); $numQuotes = substr_count($currentSnippet, '"'); if ($numQuotes % 2 == 0) { //that's good, we got the next field. the separator was a real one. $lineArray[] = substr($line, $lastPos, $pos-$lastPos); $lastPos = $pos + $sepLength; } else { //have to go on, separator was inside a field value. } $offset = $pos + $sepLength; } while (TRUE); //trim if needed if ($trimFunction !== FALSE) { while (list($k) = each($lineArray)) { $lineArray[$k] = $trimFunction($lineArray[$k]); } reset($lineArray); } //remove quotes around cell values, and unescape other quotes. while (list($k) = each($lineArray)) { if ((substr($lineArray[$k], 0, 1) == '"') && (substr($lineArray[$k], 1, 1) != '"') && (substr($lineArray[$k], -1) == '"')) { //string has to look like "hello world" and may not look like ""hello. //if two quotes are together, it's an escaped one. csv uses ms-escape style. $lineArray[$k] = substr($lineArray[$k], 1, -1); } //now un-escape the other quotes $lineArray[$k] = str_replace('""', '"', $lineArray[$k]); } reset($lineArray); //removeEmptyLines $addIt = TRUE; if ($removeEmptyLines) { do { while (list($k) = each($lineArray)) { if (!empty($lineArray[$k])) break 2; } $addIt = FALSE; } while (FALSE); reset($lineArray); } if ($addIt) { $ret[] = $lineArray; } } return $ret; } /** * takes an array and creates a csv string from it. * * the given param $array may be a simple 1-dim array like this: * $arr = array('madonna', 'alanis morisette', 'falco'); * that will result in the string: "madonna;alanis morisette;falco" * * if the param is a 2-dim array, it goes like this: * $arr = array( * array('madonna', 'pop', 'usa'), * array('alanis morisette', 'rock', 'canada'), * array('falco', 'pop', 'austria'), * ); * result: madonna;pop;usa * alanis morisette;rock;canada * falco;pop;austria * * todo: add param "fill to fit max length"? * * @access public * @param array $array (see above) * @param string $separator (default is ';') * @param string $trim (if we should trim the cells, default is 'none', can also be 'left', 'right' or 'both'. 'none' kinda makes it faster, omits many function calls, remember that.) * @param bool $removeEmptyLines (default is TRUE. removes "lines" that have no value, would come out empty.) * @return string (empty string if there is nothing at all) */ function arrayToCsvString($array, $separator=';', $trim='none', $removeEmptyLines=TRUE) { if (!is_array($array) || empty($array)) return ''; switch ($trim) { case 'none': $trimFunction = FALSE; break; case 'left': $trimFunction = 'ltrim'; break; case 'right': $trimFunction = 'rtrim'; break; default: //'both': $trimFunction = 'trim'; break; } $ret = array(); reset($array); if (is_array(current($array))) { while (list(,$lineArr) = each($array)) { if (!is_array($lineArr)) { //could issue a warning ... $ret[] = array(); } else { $subArr = array(); while (list(,$val) = each($lineArr)) { $val = $this->_valToCsvHelper($val, $separator, $trimFunction); $subArr[] = $val; } } $ret[] = join($separator, $subArr); } return join("n", $ret); } else { while (list(,$val) = each($array)) { $val = $this->_valToCsvHelper($val, $separator, $trimFunction); $ret[] = $val; } return join($separator, $ret); } } /** * works on a string to include in a csv string/file. * @access private * @param string $val * @param string $separator * @param mixed $trimFunction (bool FALSE or 'rtrim' or so.) * @return string * @see arrayToCsvString() and others. */ function _valToCsvHelper($val, $separator, $trimFunction) { if ($trimFunction) $val = $trimFunction($val); //if there is a separator (;) or a quote (") or a linebreak in the string, we need to quote it. $needQuote = FALSE; do { if (strpos($val, '"') !== FALSE) { $val = str_replace('"', '""', $val); $needQuote = TRUE; break; } if (strpos($val, $separator) !== FALSE) { $needQuote = TRUE; break; } if ((strpos($val, "n") !== FALSE) || (strpos($val, "r") !== FALSE)) { // r is for mac $needQuote = TRUE; break; } } while (FALSE); if ($needQuote) { $val = '"' . $val . '"'; } return $val; } /** * takes an array and combines elements (lines) if needed. * @access private * @param array $in * @return array */ function _checkMultiline($in) { $ret = array(); $stack = FALSE; reset($in); while (list(,$line) = each($in)) { $c = substr_count($line, '"'); if ($c % 2 == 0) { if ($stack === FALSE) { $ret[] = $line; } else { $stack .= "n" . $line; } } else { //odd number if ($stack === FALSE) { $stack = $line; } else { $ret[] = $stack . "n" . $line; $stack = FALSE; } } } return $ret; } } // end Class