Essa função em PHP converte um arquivo do Office .XLSX para texto puro, utilizando uma classe desenvolvida por Sergey Schuchkin em https://www.sibvison.ru.
/*CLASS xlsxreader.php*/ rows() ); // Example 2: // $xlsx = new SimpleXLSX('book.xlsx'); // print_r( $xlsx->rowsEx() ); // Example 3: // $xlsx = new SimpleXLSX('book.xlsx'); // print_r( $xlsx->rows(2) ); // second worksheet // // 0.4 sheets(), sheetsCount(), unixstamp( $excelDateTime ) // 0.3 - fixed empty cells (Gonzo patch) class SimpleXLSX { // Don't remove this string! Created by Sergey Schuchkin from https://www.sibvison.ru - professional php developers team 2010-2011 private $sheets; private $hyperlinks; private $package; private $sharedstrings; // scheme const SCHEMA_OFFICEDOCUMENT = 'https://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'; const SCHEMA_RELATIONSHIP = 'https://schemas.openxmlformats.org/package/2006/relationships'; const SCHEMA_SHAREDSTRINGS = 'https://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings'; const SCHEMA_WORKSHEETRELATION = 'https://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'; function __construct( $filename ) { $this->_unzip( $filename ); $this->_parse(); } function sheets() { return $this->sheets; } function sheetsCount() { return count($this->sheets); } function worksheet( $worksheet_id ) { if ( isset( $this->sheets[ $worksheet_id ] ) ) { $ws = $this->sheets[ $worksheet_id ]; if (isset($ws->hyperlinks)) { $this->hyperlinks = array(); foreach( $ws->hyperlinks->hyperlink as $hyperlink ) { $this->hyperlinks[ (string) $hyperlink['ref'] ] = (string) $hyperlink['display']; } } return $ws; } else throw new Exception('Worksheet '.$worksheet_id.' not found.'); } function dimension( $worksheet_id = 1 ) { $ws = $this->worksheet($worksheet_id); $ref = (string) $ws->dimension['ref']; $d = explode(':', $ref); $index = $this->_columnIndex( $d[1] ); return array( $index[0]+1, $index[1]+1); } // sheets numeration: 1,2,3.... function rows( $worksheet_id = 1 ) { $ws = $this->worksheet( $worksheet_id); $rows = array(); $curR = 0; foreach ($ws->sheetData->row as $row) { foreach ($row->c as $c) { list($curC,) = $this->_columnIndex((string) $c['r']); $rows[ $curR ][ $curC ] = $this->value($c); } $curR++; } return $rows; } function rowsEx( $worksheet_id = 1 ) { $rows = array(); $curR = 0; if (($ws = $this->worksheet( $worksheet_id)) === false) return false; foreach ($ws->sheetData->row as $row) { foreach ($row->c as $c) { list($curC,) = $this->_columnIndex((string) $c['r']); $rows[ $curR ][ $curC ] = array( 'name' => (string) $c['r'], 'value' => $this->value($c), 'href' => $this->href( $c ), ); } $curR++; } return $rows; } // thx Gonzo function _columnIndex( $cell = 'A1' ) { if (preg_match("/([A-Z]+)(d+)/", $cell, $matches)) { $col = $matches[1]; $row = $matches[2]; $colLen = strlen($col); $index = 0; for ($i = $colLen-1; $i >= 0; $i--) $index += (ord($col{$i}) - 64) * pow(26, $colLen-$i-1); return array($index-1, $row-1); } else throw new Exception("Invalid cell index."); } function value( $cell ) { // Determine data type $dataType = (string)$cell["t"]; switch ($dataType) { case "s": // Value is a shared string if ((string)$cell->v != '') { $value = $this->sharedstrings[intval($cell->v)]; } else { $value = ''; } break; case "b": // Value is boolean $value = (string)$cell->v; if ($value == '0') { $value = false; } else if ($value == '1') { $value = true; } else { $value = (bool)$cell->v; } break; case "inlineStr": // Value is rich text inline $value = $this->_parseRichText($cell->is); break; case "e": // Value is an error message if ((string)$cell->v != '') { $value = (string)$cell->v; } else { $value = ''; } break; default: // Value is a string $value = (string)$cell->v; // Check for numeric values if (is_numeric($value) && $dataType != 's') { if ($value == (int)$value) $value = (int)$value; elseif ($value == (float)$value) $value = (float)$value; elseif ($value == (double)$value) $value = (double)$value; } } return $value; } function href( $cell ) { return isset( $this->hyperlinks[ (string) $cell['r'] ] ) ? $this->hyperlinks[ (string) $cell['r'] ] : ''; } function _unzip( $filename ) { // Clear current file $this->datasec = array(); // Package information $this->package = array( 'filename' => $filename, 'mtime' => filemtime( $filename ), 'size' => filesize( $filename ), 'comment' => '', 'entries' => array() ); // Read file $oF = fopen($filename, 'rb'); $vZ = fread($oF, $this->package['size']); fclose($oF); // Cut end of central directory $aE = explode("x50x4bx05x06", $vZ); // Normal way $aP = unpack('x16/v1CL', $aE[1]); $this->package['comment'] = substr($aE[1], 18, $aP['CL']); // Translates end of line from other operating systems $this->package['comment'] = strtr($this->package['comment'], array("rn" => "n", "r" => "n")); // Cut the entries from the central directory $aE = explode("x50x4bx01x02", $vZ); // Explode to each part $aE = explode("x50x4bx03x04", $aE[0]); // Shift out spanning signature or empty entry array_shift($aE); // Loop through the entries foreach ($aE as $vZ) { $aI = array(); $aI['E'] = 0; $aI['EM'] = ''; // Retrieving local file header information //$aP = unpack('v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL', $vZ); $aP = unpack('v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL/v1EFL', $vZ); // Check if data is encrypted //$bE = ($aP['GPF'] && 0x0001) ? TRUE : FALSE; $bE = false; $nF = $aP['FNL']; $mF = $aP['EFL']; // Special case : value block after the compressed data if ($aP['GPF'] & 0x0008) { $aP1 = unpack('V1CRC/V1CS/V1UCS', substr($vZ, -12)); $aP['CRC'] = $aP1['CRC']; $aP['CS'] = $aP1['CS']; $aP['UCS'] = $aP1['UCS']; $vZ = substr($vZ, 0, -12); } // Getting stored filename $aI['N'] = substr($vZ, 26, $nF); if (substr($aI['N'], -1) == '/') { continue; } // Truncate full filename in path and filename $aI['P'] = dirname($aI['N']); $aI['P'] = $aI['P'] == '.' ? '' : $aI['P']; $aI['N'] = basename($aI['N']); $vZ = substr($vZ, 26 + $nF + $mF); if (strlen($vZ) != $aP['CS']) { $aI['E'] = 1; $aI['EM'] = 'Compressed size is not equal with the value in header information.'; } else { if ($bE) { $aI['E'] = 5; $aI['EM'] = 'File is encrypted, which is not supported from this class.'; } else { switch($aP['CM']) { case 0: // Stored // Here is nothing to do, the file ist flat. break; case 8: // Deflated $vZ = gzinflate($vZ); break; case 12: // BZIP2 if (! extension_loaded('bz2')) { if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') { @dl('php_bz2.dll'); } else { @dl('bz2.so'); } } if (extension_loaded('bz2')) { $vZ = bzdecompress($vZ); } else { $aI['E'] = 7; $aI['EM'] = "PHP BZIP2 extension not available."; } break; default: $aI['E'] = 6; $aI['EM'] = "De-/Compression method {$aP['CM']} is not supported."; } if (! $aI['E']) { if ($vZ === FALSE) { $aI['E'] = 2; $aI['EM'] = 'Decompression of data failed.'; } else { if (strlen($vZ) != $aP['UCS']) { $aI['E'] = 3; $aI['EM'] = 'Uncompressed size is not equal with the value in header information.'; } else { if (crc32($vZ) != $aP['CRC']) { $aI['E'] = 4; $aI['EM'] = 'CRC32 checksum is not equal with the value in header information.'; } } } } } } $aI['D'] = $vZ; // DOS to UNIX timestamp $set_date = date_default_timezone_set('America/Sao_Paulo'); $aI['T'] = mktime(($aP['FT'] & 0xf800) >> 11, ($aP['FT'] & 0x07e0) >> 5, ($aP['FT'] & 0x001f) << 1, ($aP['FD'] & 0x01e0) >> 5, ($aP['FD'] & 0x001f), (($aP['FD'] & 0xfe00) >> 9) + 1980); //$this->Entries[] = &new SimpleUnzipEntry($aI); $this->package['entries'][] = array( 'data' => $aI['D'], 'error' => $aI['E'], 'error_msg' => $aI['EM'], 'name' => $aI['N'], 'path' => $aI['P'], 'time' => $aI['T'] ); } // end for each entries } function getPackage() { return $this->package; } function getEntryData( $name ) { $dir = dirname( $name ); $name = basename( $name ); foreach( $this->package['entries'] as $entry) if ( $entry['path'] == $dir && $entry['name'] == $name) return $entry['data']; } function unixstamp( $excelDateTime ) { $d = floor( $excelDateTime ); // seconds since 1900 $t = $excelDateTime - $d; return ($d > 0) ? ( $d - 25569 ) * 86400 + $t * 86400 : $t * 86400; } function _parse() { // Document data holders $this->sharedstrings = array(); $this->sheets = array(); // Read relations and search for officeDocument $relations = simplexml_load_string( $this->getEntryData("_rels/.rels") ); foreach ($relations->Relationship as $rel) { if ($rel["Type"] == SimpleXLSX::SCHEMA_OFFICEDOCUMENT) { // Found office document! Read relations for workbook... $workbookRelations = simplexml_load_string($this->getEntryData( dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels") ); $workbookRelations->registerXPathNamespace("rel", SimpleXLSX::SCHEMA_RELATIONSHIP); // Read shared strings $sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . SimpleXLSX::SCHEMA_SHAREDSTRINGS . "']"); $sharedStringsPath = (string)$sharedStringsPath[0]['Target']; $xmlStrings = simplexml_load_string($this->getEntryData( dirname($rel["Target"]) . "/" . $sharedStringsPath) ); if (isset($xmlStrings) && isset($xmlStrings->si)) { foreach ($xmlStrings->si as $val) { if (isset($val->t)) { $this->sharedstrings[] = (string)$val->t; } elseif (isset($val->r)) { $this->sharedstrings[] = $this->_parseRichText($val); } } } // Loop relations for workbook and extract sheets... foreach ($workbookRelations->Relationship as $workbookRelation) { if ($workbookRelation["Type"] == SimpleXLSX::SCHEMA_WORKSHEETRELATION) { $this->sheets[ str_replace( 'rId', '', (string) $workbookRelation["Id"]) ] = simplexml_load_string( $this->getEntryData( dirname($rel["Target"]) . "/" . dirname($workbookRelation["Target"]) . "/" . basename($workbookRelation["Target"])) ); } } break; } } // Sort sheets ksort($this->sheets); } private function _parseRichText($is = null) { $value = array(); if (isset($is->t)) { $value[] = (string)$is->t; } else { foreach ($is->r as $run) { $value[] = (string)$run->t; } } return implode(' ', $value); } } ?> /*FUNÇÃO XLSX2Text*/ function XLSX2Text ($filename){ $xlsx = new SimpleXLSX($filename); $content = $xlsx->rowsEx(); $count = count($content); $content_value = ""; for($i = 0;$i<$count;$i++){ $content_single = $content[$i]; foreach($content_single as $key=>$value){ $content_value .= " ".$content_single[$key]['value']; } } return $content_value; } /*MODO DE USAR*/ $xlsx_content = XLSX2Text(/tmp/meuxlsx.xlsx'); echo "$xlsx_content";