Essa função em PHP converte um arquivo do Office .XLS para texto puro, utilizando uma classe desenvolvida por Matt Kruse em https://code.google.com/p/php-excel-reader/.
/*CLASSE xlsreader.php*/ * Maintained at https://code.google.com/p/php-excel-reader/ * * Format parsing and MUCH more contributed by: * Matt Roxburgh < https://www.roxburgh.me.uk > * * DOCUMENTATION * ============= * https://code.google.com/p/php-excel-reader/wiki/Documentation * * CHANGE LOG * ========== * https://code.google.com/p/php-excel-reader/wiki/ChangeHistory * * DISCUSSION/SUPPORT * ================== * https://groups.google.com/group/php-excel-reader-discuss/topics * * -------------------------------------------------------------------------- * * Originally developed by Vadim Tkachenko under the name PHPExcelReader. * (https://sourceforge.net/projects/phpexcelreader) * Based on the Java version by Andy Khan (https://www.andykhan.com). Now * maintained by David Sanders. Reads only Biff 7 and Biff 8 formats. * * PHP versions 4 and 5 * * LICENSE: This source file is subject to version 3.0 of the PHP license * that is available through the world-wide-web at the following URI: * https://www.php.net/license/3_0.txt. If you did not receive a copy of * the PHP License and are unable to obtain it through the web, please * send a note to license@php.net so we can mail you a copy immediately. * * @category Spreadsheet * @package Spreadsheet_Excel_Reader * @author Vadim Tkachenko * @license https://www.php.net/license/3_0.txt PHP License 3.0 * @version CVS: $Id: reader.php 19 2007-03-13 12:42:41Z shangxiao $ * @link https://pear.php.net/package/Spreadsheet_Excel_Reader * @see OLE, Spreadsheet_Excel_Writer * -------------------------------------------------------------------------- */ define('NUM_BIG_BLOCK_DEPOT_BLOCKS_POS', 0x2c); define('SMALL_BLOCK_DEPOT_BLOCK_POS', 0x3c); define('ROOT_START_BLOCK_POS', 0x30); define('BIG_BLOCK_SIZE', 0x200); define('SMALL_BLOCK_SIZE', 0x40); define('EXTENSION_BLOCK_POS', 0x44); define('NUM_EXTENSION_BLOCK_POS', 0x48); define('PROPERTY_STORAGE_BLOCK_SIZE', 0x80); define('BIG_BLOCK_DEPOT_BLOCKS_POS', 0x4c); define('SMALL_BLOCK_THRESHOLD', 0x1000); // property storage offsets define('SIZE_OF_NAME_POS', 0x40); define('TYPE_POS', 0x42); define('START_BLOCK_POS', 0x74); define('SIZE_POS', 0x78); define('IDENTIFIER_OLE', pack("CCCCCCCC",0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1)); function GetInt4d($data, $pos) { $value = ord($data[$pos]) | (ord($data[$pos+1]) << 8) | (ord($data[$pos+2]) << 16) | (ord($data[$pos+3]) << 24); if ($value>=4294967294) { $value=-2; } return $value; } // https://uk.php.net/manual/en/function.getdate.php function gmgetdate($ts = null){ $k = array('seconds','minutes','hours','mday','wday','mon','year','yday','weekday','month',0); return(array_comb($k,split(":",gmdate('s:i:G:j:w:n:Y:z:l:F:U',is_null($ts)?time():$ts)))); } // Added for PHP4 compatibility function array_comb($array1, $array2) { $out = array(); foreach ($array1 as $key => $value) { $out[$value] = $array2[$key]; } return $out; } function v($data,$pos) { return ord($data[$pos]) | ord($data[$pos+1])<<8; } class OLERead { var $data = ''; function OLERead(){ } function read($sFileName){ // check if file exist and is readable (Darko Miljanovic) if(!is_readable($sFileName)) { $this->error = 1; return false; } $this->data = @file_get_contents($sFileName); if (!$this->data) { $this->error = 1; return false; } if (substr($this->data, 0, 8) != IDENTIFIER_OLE) { $this->error = 1; return false; } $this->numBigBlockDepotBlocks = GetInt4d($this->data, NUM_BIG_BLOCK_DEPOT_BLOCKS_POS); $this->sbdStartBlock = GetInt4d($this->data, SMALL_BLOCK_DEPOT_BLOCK_POS); $this->rootStartBlock = GetInt4d($this->data, ROOT_START_BLOCK_POS); $this->extensionBlock = GetInt4d($this->data, EXTENSION_BLOCK_POS); $this->numExtensionBlocks = GetInt4d($this->data, NUM_EXTENSION_BLOCK_POS); $bigBlockDepotBlocks = array(); $pos = BIG_BLOCK_DEPOT_BLOCKS_POS; $bbdBlocks = $this->numBigBlockDepotBlocks; if ($this->numExtensionBlocks != 0) { $bbdBlocks = (BIG_BLOCK_SIZE - BIG_BLOCK_DEPOT_BLOCKS_POS)/4; } for ($i = 0; $i < $bbdBlocks; $i++) { $bigBlockDepotBlocks[$i] = GetInt4d($this->data, $pos); $pos += 4; } for ($j = 0; $j < $this->numExtensionBlocks; $j++) { $pos = ($this->extensionBlock + 1) * BIG_BLOCK_SIZE; $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, BIG_BLOCK_SIZE / 4 - 1); for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; $i++) { $bigBlockDepotBlocks[$i] = GetInt4d($this->data, $pos); $pos += 4; } $bbdBlocks += $blocksToRead; if ($bbdBlocks < $this->numBigBlockDepotBlocks) { $this->extensionBlock = GetInt4d($this->data, $pos); } } // readBigBlockDepot $pos = 0; $index = 0; $this->bigBlockChain = array(); for ($i = 0; $i < $this->numBigBlockDepotBlocks; $i++) { $pos = ($bigBlockDepotBlocks[$i] + 1) * BIG_BLOCK_SIZE; //echo "pos = $pos"; for ($j = 0 ; $j < BIG_BLOCK_SIZE / 4; $j++) { $this->bigBlockChain[$index] = GetInt4d($this->data, $pos); $pos += 4 ; $index++; } } // readSmallBlockDepot(); $pos = 0; $index = 0; $sbdBlock = $this->sbdStartBlock; $this->smallBlockChain = array(); while ($sbdBlock != -2) { $pos = ($sbdBlock + 1) * BIG_BLOCK_SIZE; for ($j = 0; $j < BIG_BLOCK_SIZE / 4; $j++) { $this->smallBlockChain[$index] = GetInt4d($this->data, $pos); $pos += 4; $index++; } $sbdBlock = $this->bigBlockChain[$sbdBlock]; } // readData(rootStartBlock) $block = $this->rootStartBlock; $pos = 0; $this->entry = $this->__readData($block); $this->__readPropertySets(); } function __readData($bl) { $block = $bl; $pos = 0; $data = ''; while ($block != -2) { $pos = ($block + 1) * BIG_BLOCK_SIZE; $data = $data.substr($this->data, $pos, BIG_BLOCK_SIZE); $block = $this->bigBlockChain[$block]; } return $data; } function __readPropertySets(){ $offset = 0; while ($offset < strlen($this->entry)) { $d = substr($this->entry, $offset, PROPERTY_STORAGE_BLOCK_SIZE); $nameSize = ord($d[SIZE_OF_NAME_POS]) | (ord($d[SIZE_OF_NAME_POS+1]) << 8); $type = ord($d[TYPE_POS]); $startBlock = GetInt4d($d, START_BLOCK_POS); $size = GetInt4d($d, SIZE_POS); $name = ''; for ($i = 0; $i < $nameSize ; $i++) { $name .= $d[$i]; } $name = str_replace("x00", "", $name); $this->props[] = array ( 'name' => $name, 'type' => $type, 'startBlock' => $startBlock, 'size' => $size); if ((strtolower($name) == "workbook") || ( strtolower($name) == "book")) { $this->wrkbook = count($this->props) - 1; } if ($name == "Root Entry") { $this->rootentry = count($this->props) - 1; } $offset += PROPERTY_STORAGE_BLOCK_SIZE; } } function getWorkBook(){ if ($this->props[$this->wrkbook]['size'] < SMALL_BLOCK_THRESHOLD){ $rootdata = $this->__readData($this->props[$this->rootentry]['startBlock']); $streamData = ''; $block = $this->props[$this->wrkbook]['startBlock']; $pos = 0; while ($block != -2) { $pos = $block * SMALL_BLOCK_SIZE; $streamData .= substr($rootdata, $pos, SMALL_BLOCK_SIZE); $block = $this->smallBlockChain[$block]; } return $streamData; }else{ $numBlocks = $this->props[$this->wrkbook]['size'] / BIG_BLOCK_SIZE; if ($this->props[$this->wrkbook]['size'] % BIG_BLOCK_SIZE != 0) { $numBlocks++; } if ($numBlocks == 0) return ''; $streamData = ''; $block = $this->props[$this->wrkbook]['startBlock']; $pos = 0; while ($block != -2) { $pos = ($block + 1) * BIG_BLOCK_SIZE; $streamData .= substr($this->data, $pos, BIG_BLOCK_SIZE); $block = $this->bigBlockChain[$block]; } return $streamData; } } } define('SPREADSHEET_EXCEL_READER_BIFF8', 0x600); define('SPREADSHEET_EXCEL_READER_BIFF7', 0x500); define('SPREADSHEET_EXCEL_READER_WORKBOOKGLOBALS', 0x5); define('SPREADSHEET_EXCEL_READER_WORKSHEET', 0x10); define('SPREADSHEET_EXCEL_READER_TYPE_BOF', 0x809); define('SPREADSHEET_EXCEL_READER_TYPE_EOF', 0x0a); define('SPREADSHEET_EXCEL_READER_TYPE_BOUNDSHEET', 0x85); define('SPREADSHEET_EXCEL_READER_TYPE_DIMENSION', 0x200); define('SPREADSHEET_EXCEL_READER_TYPE_ROW', 0x208); define('SPREADSHEET_EXCEL_READER_TYPE_DBCELL', 0xd7); define('SPREADSHEET_EXCEL_READER_TYPE_FILEPASS', 0x2f); define('SPREADSHEET_EXCEL_READER_TYPE_NOTE', 0x1c); define('SPREADSHEET_EXCEL_READER_TYPE_TXO', 0x1b6); define('SPREADSHEET_EXCEL_READER_TYPE_RK', 0x7e); define('SPREADSHEET_EXCEL_READER_TYPE_RK2', 0x27e); define('SPREADSHEET_EXCEL_READER_TYPE_MULRK', 0xbd); define('SPREADSHEET_EXCEL_READER_TYPE_MULBLANK', 0xbe); define('SPREADSHEET_EXCEL_READER_TYPE_INDEX', 0x20b); define('SPREADSHEET_EXCEL_READER_TYPE_SST', 0xfc); define('SPREADSHEET_EXCEL_READER_TYPE_EXTSST', 0xff); define('SPREADSHEET_EXCEL_READER_TYPE_CONTINUE', 0x3c); define('SPREADSHEET_EXCEL_READER_TYPE_LABEL', 0x204); define('SPREADSHEET_EXCEL_READER_TYPE_LABELSST', 0xfd); define('SPREADSHEET_EXCEL_READER_TYPE_NUMBER', 0x203); define('SPREADSHEET_EXCEL_READER_TYPE_NAME', 0x18); define('SPREADSHEET_EXCEL_READER_TYPE_ARRAY', 0x221); define('SPREADSHEET_EXCEL_READER_TYPE_STRING', 0x207); define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA', 0x406); define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA2', 0x6); define('SPREADSHEET_EXCEL_READER_TYPE_FORMAT', 0x41e); define('SPREADSHEET_EXCEL_READER_TYPE_XF', 0xe0); define('SPREADSHEET_EXCEL_READER_TYPE_BOOLERR', 0x205); define('SPREADSHEET_EXCEL_READER_TYPE_FONT', 0x0031); define('SPREADSHEET_EXCEL_READER_TYPE_PALETTE', 0x0092); define('SPREADSHEET_EXCEL_READER_TYPE_UNKNOWN', 0xffff); define('SPREADSHEET_EXCEL_READER_TYPE_NINETEENFOUR', 0x22); define('SPREADSHEET_EXCEL_READER_TYPE_MERGEDCELLS', 0xE5); define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS' , 25569); define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS1904', 24107); define('SPREADSHEET_EXCEL_READER_MSINADAY', 86400); define('SPREADSHEET_EXCEL_READER_TYPE_HYPER', 0x01b8); define('SPREADSHEET_EXCEL_READER_TYPE_COLINFO', 0x7d); define('SPREADSHEET_EXCEL_READER_TYPE_DEFCOLWIDTH', 0x55); define('SPREADSHEET_EXCEL_READER_TYPE_STANDARDWIDTH', 0x99); define('SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT', "%s"); /* * Main Class */ class Spreadsheet_Excel_Reader { // MK: Added to make data retrieval easier var $colnames = array(); var $colindexes = array(); var $standardColWidth = 0; var $defaultColWidth = 0; function myHex($d) { if ($d < 16) return "0" . dechex($d); return dechex($d); } function dumpHexData($data, $pos, $length) { $info = ""; for ($i = 0; $i <= $length; $i++) { $info .= ($i==0?"":" ") . $this->myHex(ord($data[$pos + $i])) . (ord($data[$pos + $i])>31? "[" . $data[$pos + $i] . "]":''); } return $info; } function getCol($col) { if (is_string($col)) { $col = strtolower($col); if (array_key_exists($col,$this->colnames)) { $col = $this->colnames[$col]; } } return $col; } // PUBLIC API FUNCTIONS // -------------------- function val($row,$col,$sheet=0) { $col = $this->getCol($col); if (array_key_exists($row,$this->sheets[$sheet]['cells']) && array_key_exists($col,$this->sheets[$sheet]['cells'][$row])) { return $this->sheets[$sheet]['cells'][$row][$col]; } return ""; } function value($row,$col,$sheet=0) { return $this->val($row,$col,$sheet); } function info($row,$col,$type='',$sheet=0) { $col = $this->getCol($col); if (array_key_exists('cellsInfo',$this->sheets[$sheet]) && array_key_exists($row,$this->sheets[$sheet]['cellsInfo']) && array_key_exists($col,$this->sheets[$sheet]['cellsInfo'][$row]) && array_key_exists($type,$this->sheets[$sheet]['cellsInfo'][$row][$col])) { return $this->sheets[$sheet]['cellsInfo'][$row][$col][$type]; } return ""; } function type($row,$col,$sheet=0) { return $this->info($row,$col,'type',$sheet); } function raw($row,$col,$sheet=0) { return $this->info($row,$col,'raw',$sheet); } function rowspan($row,$col,$sheet=0) { $val = $this->info($row,$col,'rowspan',$sheet); if ($val=="") { return 1; } return $val; } function colspan($row,$col,$sheet=0) { $val = $this->info($row,$col,'colspan',$sheet); if ($val=="") { return 1; } return $val; } function hyperlink($row,$col,$sheet=0) { $link = $this->sheets[$sheet]['cellsInfo'][$row][$col]['hyperlink']; if ($link) { return $link['link']; } return ''; } function rowcount($sheet=0) { return $this->sheets[$sheet]['numRows']; } function colcount($sheet=0) { return $this->sheets[$sheet]['numCols']; } function colwidth($col,$sheet=0) { // Col width is actually the width of the number 0. So we have to estimate and come close return $this->colInfo[$sheet][$col]['width']/9142*200; } function colhidden($col,$sheet=0) { return !!$this->colInfo[$sheet][$col]['hidden']; } function rowheight($row,$sheet=0) { return $this->rowInfo[$sheet][$row]['height']; } function rowhidden($row,$sheet=0) { return !!$this->rowInfo[$sheet][$row]['hidden']; } // GET THE CSS FOR FORMATTING // ========================== function style($row,$col,$sheet=0,$properties='') { $css = ""; $font=$this->font($row,$col,$sheet); if ($font!="") { $css .= "font-family:$font;"; } $align=$this->align($row,$col,$sheet); if ($align!="") { $css .= "text-align:$align;"; } $height=$this->height($row,$col,$sheet); if ($height!="") { $css .= "font-size:$height"."px;"; } $bgcolor=$this->bgColor($row,$col,$sheet); if ($bgcolor!="") { $bgcolor = $this->colors[$bgcolor]; $css .= "background-color:$bgcolor;"; } $color=$this->color($row,$col,$sheet); if ($color!="") { $css .= "color:$color;"; } $bold=$this->bold($row,$col,$sheet); if ($bold) { $css .= "font-weight:bold;"; } $italic=$this->italic($row,$col,$sheet); if ($italic) { $css .= "font-style:italic;"; } $underline=$this->underline($row,$col,$sheet); if ($underline) { $css .= "text-decoration:underline;"; } // Borders $bLeft = $this->borderLeft($row,$col,$sheet); $bRight = $this->borderRight($row,$col,$sheet); $bTop = $this->borderTop($row,$col,$sheet); $bBottom = $this->borderBottom($row,$col,$sheet); $bLeftCol = $this->borderLeftColor($row,$col,$sheet); $bRightCol = $this->borderRightColor($row,$col,$sheet); $bTopCol = $this->borderTopColor($row,$col,$sheet); $bBottomCol = $this->borderBottomColor($row,$col,$sheet); // Try to output the minimal required style if ($bLeft!="" && $bLeft==$bRight && $bRight==$bTop && $bTop==$bBottom) { $css .= "border:" . $this->lineStylesCss[$bLeft] .";"; } else { if ($bLeft!="") { $css .= "border-left:" . $this->lineStylesCss[$bLeft] .";"; } if ($bRight!="") { $css .= "border-right:" . $this->lineStylesCss[$bRight] .";"; } if ($bTop!="") { $css .= "border-top:" . $this->lineStylesCss[$bTop] .";"; } if ($bBottom!="") { $css .= "border-bottom:" . $this->lineStylesCss[$bBottom] .";"; } } // Only output border colors if there is an actual border specified if ($bLeft!="" && $bLeftCol!="") { $css .= "border-left-color:" . $bLeftCol .";"; } if ($bRight!="" && $bRightCol!="") { $css .= "border-right-color:" . $bRightCol .";"; } if ($bTop!="" && $bTopCol!="") { $css .= "border-top-color:" . $bTopCol . ";"; } if ($bBottom!="" && $bBottomCol!="") { $css .= "border-bottom-color:" . $bBottomCol .";"; } return $css; } // FORMAT PROPERTIES // ================= function format($row,$col,$sheet=0) { return $this->info($row,$col,'format',$sheet); } function formatIndex($row,$col,$sheet=0) { return $this->info($row,$col,'formatIndex',$sheet); } function formatColor($row,$col,$sheet=0) { return $this->info($row,$col,'formatColor',$sheet); } // CELL (XF) PROPERTIES // ==================== function xfRecord($row,$col,$sheet=0) { $xfIndex = $this->info($row,$col,'xfIndex',$sheet); if ($xfIndex!="") { return $this->xfRecords[$xfIndex]; } return null; } function xfProperty($row,$col,$sheet,$prop) { $xfRecord = $this->xfRecord($row,$col,$sheet); if ($xfRecord!=null) { return $xfRecord[$prop]; } return ""; } function align($row,$col,$sheet=0) { return $this->xfProperty($row,$col,$sheet,'align'); } function bgColor($row,$col,$sheet=0) { return $this->xfProperty($row,$col,$sheet,'bgColor'); } function borderLeft($row,$col,$sheet=0) { return $this->xfProperty($row,$col,$sheet,'borderLeft'); } function borderRight($row,$col,$sheet=0) { return $this->xfProperty($row,$col,$sheet,'borderRight'); } function borderTop($row,$col,$sheet=0) { return $this->xfProperty($row,$col,$sheet,'borderTop'); } function borderBottom($row,$col,$sheet=0) { return $this->xfProperty($row,$col,$sheet,'borderBottom'); } function borderLeftColor($row,$col,$sheet=0) { return $this->colors[$this->xfProperty($row,$col,$sheet,'borderLeftColor')]; } function borderRightColor($row,$col,$sheet=0) { return $this->colors[$this->xfProperty($row,$col,$sheet,'borderRightColor')]; } function borderTopColor($row,$col,$sheet=0) { return $this->colors[$this->xfProperty($row,$col,$sheet,'borderTopColor')]; } function borderBottomColor($row,$col,$sheet=0) { return $this->colors[$this->xfProperty($row,$col,$sheet,'borderBottomColor')]; } // FONT PROPERTIES // =============== function fontRecord($row,$col,$sheet=0) { $xfRecord = $this->xfRecord($row,$col,$sheet); if ($xfRecord!=null) { $font = $xfRecord['fontIndex']; if ($font!=null) { return $this->fontRecords[$font]; } } return null; } function fontProperty($row,$col,$sheet=0,$prop) { $font = $this->fontRecord($row,$col,$sheet); if ($font!=null) { return $font[$prop]; } return false; } function fontIndex($row,$col,$sheet=0) { return $this->xfProperty($row,$col,$sheet,'fontIndex'); } function color($row,$col,$sheet=0) { $formatColor = $this->formatColor($row,$col,$sheet); if ($formatColor!="") { return $formatColor; } $ci = $this->fontProperty($row,$col,$sheet,'color'); return $this->rawColor($ci); } function rawColor($ci) { if (($ci <> 0x7FFF) && ($ci <> '')) { return $this->colors[$ci]; } return ""; } function bold($row,$col,$sheet=0) { return $this->fontProperty($row,$col,$sheet,'bold'); } function italic($row,$col,$sheet=0) { return $this->fontProperty($row,$col,$sheet,'italic'); } function underline($row,$col,$sheet=0) { return $this->fontProperty($row,$col,$sheet,'under'); } function height($row,$col,$sheet=0) { return $this->fontProperty($row,$col,$sheet,'height'); } function font($row,$col,$sheet=0) { return $this->fontProperty($row,$col,$sheet,'font'); } // DUMP AN HTML TABLE OF THE ENTIRE XLS DATA // ========================================= function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel') { // $out = "
  | "; } for($i=1;$i<=$this->colcount($sheet);$i++) { // $style = "width:" . ($this->colwidth($i,$sheet)*1) . "px;"; if ($this->colhidden($i,$sheet)) { // $style .= "display:none;"; } // $out .= "ntt" . strtoupper($this->colindexes[$i]) . " | "; } // $out .= "
---|---|
1?" colspan=$colspan":"") . ($rowspan > 1?" rowspan=$rowspan":"") . ">"; $val = $this->val($row,$col,$sheet); if ($val=='') { $val=" "; } else { $val = htmlentities($val); $link = $this->hyperlink($row,$col,$sheet); if ($link!='') { $val = "$link $valn"; } } $out .= nl2br($val)." "; // $out .= " | "; } } // $out .= "