OLE.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheet\Shared;
  3. // vim: set expandtab tabstop=4 shiftwidth=4:
  4. // +----------------------------------------------------------------------+
  5. // | PHP Version 4 |
  6. // +----------------------------------------------------------------------+
  7. // | Copyright (c) 1997-2002 The PHP Group |
  8. // +----------------------------------------------------------------------+
  9. // | This source file is subject to version 2.02 of the PHP license, |
  10. // | that is bundled with this package in the file LICENSE, and is |
  11. // | available at through the world-wide-web at |
  12. // | http://www.php.net/license/2_02.txt. |
  13. // | If you did not receive a copy of the PHP license and are unable to |
  14. // | obtain it through the world-wide-web, please send a note to |
  15. // | license@php.net so we can mail you a copy immediately. |
  16. // +----------------------------------------------------------------------+
  17. // | Author: Xavier Noguer <xnoguer@php.net> |
  18. // | Based on OLE::Storage_Lite by Kawai, Takanori |
  19. // +----------------------------------------------------------------------+
  20. //
  21. use PhpOffice\PhpSpreadsheet\Exception;
  22. use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
  23. use PhpOffice\PhpSpreadsheet\Shared\OLE\ChainedBlockStream;
  24. use PhpOffice\PhpSpreadsheet\Shared\OLE\PPS\Root;
  25. /*
  26. * Array for storing OLE instances that are accessed from
  27. * OLE_ChainedBlockStream::stream_open().
  28. *
  29. * @var array
  30. */
  31. $GLOBALS['_OLE_INSTANCES'] = [];
  32. /**
  33. * OLE package base class.
  34. *
  35. * @author Xavier Noguer <xnoguer@php.net>
  36. * @author Christian Schmidt <schmidt@php.net>
  37. */
  38. class OLE
  39. {
  40. const OLE_PPS_TYPE_ROOT = 5;
  41. const OLE_PPS_TYPE_DIR = 1;
  42. const OLE_PPS_TYPE_FILE = 2;
  43. const OLE_DATA_SIZE_SMALL = 0x1000;
  44. const OLE_LONG_INT_SIZE = 4;
  45. const OLE_PPS_SIZE = 0x80;
  46. /**
  47. * The file handle for reading an OLE container.
  48. *
  49. * @var resource
  50. */
  51. public $_file_handle;
  52. /**
  53. * Array of PPS's found on the OLE container.
  54. *
  55. * @var array
  56. */
  57. public $_list = [];
  58. /**
  59. * Root directory of OLE container.
  60. *
  61. * @var Root
  62. */
  63. public $root;
  64. /**
  65. * Big Block Allocation Table.
  66. *
  67. * @var array (blockId => nextBlockId)
  68. */
  69. public $bbat;
  70. /**
  71. * Short Block Allocation Table.
  72. *
  73. * @var array (blockId => nextBlockId)
  74. */
  75. public $sbat;
  76. /**
  77. * Size of big blocks. This is usually 512.
  78. *
  79. * @var int number of octets per block
  80. */
  81. public $bigBlockSize;
  82. /**
  83. * Size of small blocks. This is usually 64.
  84. *
  85. * @var int number of octets per block
  86. */
  87. public $smallBlockSize;
  88. /**
  89. * Threshold for big blocks.
  90. *
  91. * @var int
  92. */
  93. public $bigBlockThreshold;
  94. /**
  95. * Reads an OLE container from the contents of the file given.
  96. *
  97. * @acces public
  98. *
  99. * @param string $filename
  100. *
  101. * @return bool true on success, PEAR_Error on failure
  102. */
  103. public function read($filename)
  104. {
  105. $fh = fopen($filename, 'rb');
  106. if (!$fh) {
  107. throw new ReaderException("Can't open file $filename");
  108. }
  109. $this->_file_handle = $fh;
  110. $signature = fread($fh, 8);
  111. if ("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" != $signature) {
  112. throw new ReaderException("File doesn't seem to be an OLE container.");
  113. }
  114. fseek($fh, 28);
  115. if (fread($fh, 2) != "\xFE\xFF") {
  116. // This shouldn't be a problem in practice
  117. throw new ReaderException('Only Little-Endian encoding is supported.');
  118. }
  119. // Size of blocks and short blocks in bytes
  120. $this->bigBlockSize = 2 ** self::readInt2($fh);
  121. $this->smallBlockSize = 2 ** self::readInt2($fh);
  122. // Skip UID, revision number and version number
  123. fseek($fh, 44);
  124. // Number of blocks in Big Block Allocation Table
  125. $bbatBlockCount = self::readInt4($fh);
  126. // Root chain 1st block
  127. $directoryFirstBlockId = self::readInt4($fh);
  128. // Skip unused bytes
  129. fseek($fh, 56);
  130. // Streams shorter than this are stored using small blocks
  131. $this->bigBlockThreshold = self::readInt4($fh);
  132. // Block id of first sector in Short Block Allocation Table
  133. $sbatFirstBlockId = self::readInt4($fh);
  134. // Number of blocks in Short Block Allocation Table
  135. $sbbatBlockCount = self::readInt4($fh);
  136. // Block id of first sector in Master Block Allocation Table
  137. $mbatFirstBlockId = self::readInt4($fh);
  138. // Number of blocks in Master Block Allocation Table
  139. $mbbatBlockCount = self::readInt4($fh);
  140. $this->bbat = [];
  141. // Remaining 4 * 109 bytes of current block is beginning of Master
  142. // Block Allocation Table
  143. $mbatBlocks = [];
  144. for ($i = 0; $i < 109; ++$i) {
  145. $mbatBlocks[] = self::readInt4($fh);
  146. }
  147. // Read rest of Master Block Allocation Table (if any is left)
  148. $pos = $this->getBlockOffset($mbatFirstBlockId);
  149. for ($i = 0; $i < $mbbatBlockCount; ++$i) {
  150. fseek($fh, $pos);
  151. for ($j = 0; $j < $this->bigBlockSize / 4 - 1; ++$j) {
  152. $mbatBlocks[] = self::readInt4($fh);
  153. }
  154. // Last block id in each block points to next block
  155. $pos = $this->getBlockOffset(self::readInt4($fh));
  156. }
  157. // Read Big Block Allocation Table according to chain specified by $mbatBlocks
  158. for ($i = 0; $i < $bbatBlockCount; ++$i) {
  159. $pos = $this->getBlockOffset($mbatBlocks[$i]);
  160. fseek($fh, $pos);
  161. for ($j = 0; $j < $this->bigBlockSize / 4; ++$j) {
  162. $this->bbat[] = self::readInt4($fh);
  163. }
  164. }
  165. // Read short block allocation table (SBAT)
  166. $this->sbat = [];
  167. $shortBlockCount = $sbbatBlockCount * $this->bigBlockSize / 4;
  168. $sbatFh = $this->getStream($sbatFirstBlockId);
  169. for ($blockId = 0; $blockId < $shortBlockCount; ++$blockId) {
  170. $this->sbat[$blockId] = self::readInt4($sbatFh);
  171. }
  172. fclose($sbatFh);
  173. $this->readPpsWks($directoryFirstBlockId);
  174. return true;
  175. }
  176. /**
  177. * @param int $blockId byte offset from beginning of file
  178. *
  179. * @return int
  180. */
  181. public function getBlockOffset($blockId)
  182. {
  183. return 512 + $blockId * $this->bigBlockSize;
  184. }
  185. /**
  186. * Returns a stream for use with fread() etc. External callers should
  187. * use \PhpOffice\PhpSpreadsheet\Shared\OLE\PPS\File::getStream().
  188. *
  189. * @param int|OLE\PPS $blockIdOrPps block id or PPS
  190. *
  191. * @return resource read-only stream
  192. */
  193. public function getStream($blockIdOrPps)
  194. {
  195. static $isRegistered = false;
  196. if (!$isRegistered) {
  197. stream_wrapper_register('ole-chainedblockstream', ChainedBlockStream::class);
  198. $isRegistered = true;
  199. }
  200. // Store current instance in global array, so that it can be accessed
  201. // in OLE_ChainedBlockStream::stream_open().
  202. // Object is removed from self::$instances in OLE_Stream::close().
  203. $GLOBALS['_OLE_INSTANCES'][] = $this;
  204. $keys = array_keys($GLOBALS['_OLE_INSTANCES']);
  205. $instanceId = end($keys);
  206. $path = 'ole-chainedblockstream://oleInstanceId=' . $instanceId;
  207. if ($blockIdOrPps instanceof OLE\PPS) {
  208. $path .= '&blockId=' . $blockIdOrPps->startBlock;
  209. $path .= '&size=' . $blockIdOrPps->Size;
  210. } else {
  211. $path .= '&blockId=' . $blockIdOrPps;
  212. }
  213. return fopen($path, 'rb');
  214. }
  215. /**
  216. * Reads a signed char.
  217. *
  218. * @param resource $fileHandle file handle
  219. *
  220. * @return int
  221. */
  222. private static function readInt1($fileHandle)
  223. {
  224. [, $tmp] = unpack('c', fread($fileHandle, 1));
  225. return $tmp;
  226. }
  227. /**
  228. * Reads an unsigned short (2 octets).
  229. *
  230. * @param resource $fileHandle file handle
  231. *
  232. * @return int
  233. */
  234. private static function readInt2($fileHandle)
  235. {
  236. [, $tmp] = unpack('v', fread($fileHandle, 2));
  237. return $tmp;
  238. }
  239. /**
  240. * Reads an unsigned long (4 octets).
  241. *
  242. * @param resource $fileHandle file handle
  243. *
  244. * @return int
  245. */
  246. private static function readInt4($fileHandle)
  247. {
  248. [, $tmp] = unpack('V', fread($fileHandle, 4));
  249. return $tmp;
  250. }
  251. /**
  252. * Gets information about all PPS's on the OLE container from the PPS WK's
  253. * creates an OLE_PPS object for each one.
  254. *
  255. * @param int $blockId the block id of the first block
  256. *
  257. * @return bool true on success, PEAR_Error on failure
  258. */
  259. public function readPpsWks($blockId)
  260. {
  261. $fh = $this->getStream($blockId);
  262. for ($pos = 0; true; $pos += 128) {
  263. fseek($fh, $pos, SEEK_SET);
  264. $nameUtf16 = fread($fh, 64);
  265. $nameLength = self::readInt2($fh);
  266. $nameUtf16 = substr($nameUtf16, 0, $nameLength - 2);
  267. // Simple conversion from UTF-16LE to ISO-8859-1
  268. $name = str_replace("\x00", '', $nameUtf16);
  269. $type = self::readInt1($fh);
  270. switch ($type) {
  271. case self::OLE_PPS_TYPE_ROOT:
  272. $pps = new OLE\PPS\Root(null, null, []);
  273. $this->root = $pps;
  274. break;
  275. case self::OLE_PPS_TYPE_DIR:
  276. $pps = new OLE\PPS(null, null, null, null, null, null, null, null, null, []);
  277. break;
  278. case self::OLE_PPS_TYPE_FILE:
  279. $pps = new OLE\PPS\File($name);
  280. break;
  281. default:
  282. throw new Exception('Unsupported PPS type');
  283. }
  284. fseek($fh, 1, SEEK_CUR);
  285. $pps->Type = $type;
  286. $pps->Name = $name;
  287. $pps->PrevPps = self::readInt4($fh);
  288. $pps->NextPps = self::readInt4($fh);
  289. $pps->DirPps = self::readInt4($fh);
  290. fseek($fh, 20, SEEK_CUR);
  291. $pps->Time1st = self::OLE2LocalDate(fread($fh, 8));
  292. $pps->Time2nd = self::OLE2LocalDate(fread($fh, 8));
  293. $pps->startBlock = self::readInt4($fh);
  294. $pps->Size = self::readInt4($fh);
  295. $pps->No = count($this->_list);
  296. $this->_list[] = $pps;
  297. // check if the PPS tree (starting from root) is complete
  298. if (isset($this->root) && $this->ppsTreeComplete($this->root->No)) {
  299. break;
  300. }
  301. }
  302. fclose($fh);
  303. // Initialize $pps->children on directories
  304. foreach ($this->_list as $pps) {
  305. if ($pps->Type == self::OLE_PPS_TYPE_DIR || $pps->Type == self::OLE_PPS_TYPE_ROOT) {
  306. $nos = [$pps->DirPps];
  307. $pps->children = [];
  308. while ($nos) {
  309. $no = array_pop($nos);
  310. if ($no != -1) {
  311. $childPps = $this->_list[$no];
  312. $nos[] = $childPps->PrevPps;
  313. $nos[] = $childPps->NextPps;
  314. $pps->children[] = $childPps;
  315. }
  316. }
  317. }
  318. }
  319. return true;
  320. }
  321. /**
  322. * It checks whether the PPS tree is complete (all PPS's read)
  323. * starting with the given PPS (not necessarily root).
  324. *
  325. * @param int $index The index of the PPS from which we are checking
  326. *
  327. * @return bool Whether the PPS tree for the given PPS is complete
  328. */
  329. private function ppsTreeComplete($index)
  330. {
  331. return isset($this->_list[$index]) &&
  332. ($pps = $this->_list[$index]) &&
  333. ($pps->PrevPps == -1 ||
  334. $this->ppsTreeComplete($pps->PrevPps)) &&
  335. ($pps->NextPps == -1 ||
  336. $this->ppsTreeComplete($pps->NextPps)) &&
  337. ($pps->DirPps == -1 ||
  338. $this->ppsTreeComplete($pps->DirPps));
  339. }
  340. /**
  341. * Checks whether a PPS is a File PPS or not.
  342. * If there is no PPS for the index given, it will return false.
  343. *
  344. * @param int $index The index for the PPS
  345. *
  346. * @return bool true if it's a File PPS, false otherwise
  347. */
  348. public function isFile($index)
  349. {
  350. if (isset($this->_list[$index])) {
  351. return $this->_list[$index]->Type == self::OLE_PPS_TYPE_FILE;
  352. }
  353. return false;
  354. }
  355. /**
  356. * Checks whether a PPS is a Root PPS or not.
  357. * If there is no PPS for the index given, it will return false.
  358. *
  359. * @param int $index the index for the PPS
  360. *
  361. * @return bool true if it's a Root PPS, false otherwise
  362. */
  363. public function isRoot($index)
  364. {
  365. if (isset($this->_list[$index])) {
  366. return $this->_list[$index]->Type == self::OLE_PPS_TYPE_ROOT;
  367. }
  368. return false;
  369. }
  370. /**
  371. * Gives the total number of PPS's found in the OLE container.
  372. *
  373. * @return int The total number of PPS's found in the OLE container
  374. */
  375. public function ppsTotal()
  376. {
  377. return count($this->_list);
  378. }
  379. /**
  380. * Gets data from a PPS
  381. * If there is no PPS for the index given, it will return an empty string.
  382. *
  383. * @param int $index The index for the PPS
  384. * @param int $position The position from which to start reading
  385. * (relative to the PPS)
  386. * @param int $length The amount of bytes to read (at most)
  387. *
  388. * @return string The binary string containing the data requested
  389. *
  390. * @see OLE_PPS_File::getStream()
  391. */
  392. public function getData($index, $position, $length)
  393. {
  394. // if position is not valid return empty string
  395. if (!isset($this->_list[$index]) || ($position >= $this->_list[$index]->Size) || ($position < 0)) {
  396. return '';
  397. }
  398. $fh = $this->getStream($this->_list[$index]);
  399. $data = stream_get_contents($fh, $length, $position);
  400. fclose($fh);
  401. return $data;
  402. }
  403. /**
  404. * Gets the data length from a PPS
  405. * If there is no PPS for the index given, it will return 0.
  406. *
  407. * @param int $index The index for the PPS
  408. *
  409. * @return int The amount of bytes in data the PPS has
  410. */
  411. public function getDataLength($index)
  412. {
  413. if (isset($this->_list[$index])) {
  414. return $this->_list[$index]->Size;
  415. }
  416. return 0;
  417. }
  418. /**
  419. * Utility function to transform ASCII text to Unicode.
  420. *
  421. * @param string $ascii The ASCII string to transform
  422. *
  423. * @return string The string in Unicode
  424. */
  425. public static function ascToUcs($ascii)
  426. {
  427. $rawname = '';
  428. $iMax = strlen($ascii);
  429. for ($i = 0; $i < $iMax; ++$i) {
  430. $rawname .= $ascii[$i]
  431. . "\x00";
  432. }
  433. return $rawname;
  434. }
  435. /**
  436. * Utility function
  437. * Returns a string for the OLE container with the date given.
  438. *
  439. * @param float|int $date A timestamp
  440. *
  441. * @return string The string for the OLE container
  442. */
  443. public static function localDateToOLE($date)
  444. {
  445. if (!$date) {
  446. return "\x00\x00\x00\x00\x00\x00\x00\x00";
  447. }
  448. $dateTime = Date::dateTimeFromTimestamp("$date");
  449. // days from 1-1-1601 until the beggining of UNIX era
  450. $days = 134774;
  451. // calculate seconds
  452. $big_date = $days * 24 * 3600 + (float) $dateTime->format('U');
  453. // multiply just to make MS happy
  454. $big_date *= 10000000;
  455. // Make HEX string
  456. $res = '';
  457. $factor = 2 ** 56;
  458. while ($factor >= 1) {
  459. $hex = (int) floor($big_date / $factor);
  460. $res = pack('c', $hex) . $res;
  461. $big_date = fmod($big_date, $factor);
  462. $factor /= 256;
  463. }
  464. return $res;
  465. }
  466. /**
  467. * Returns a timestamp from an OLE container's date.
  468. *
  469. * @param string $oleTimestamp A binary string with the encoded date
  470. *
  471. * @return float|int The Unix timestamp corresponding to the string
  472. */
  473. public static function OLE2LocalDate($oleTimestamp)
  474. {
  475. if (strlen($oleTimestamp) != 8) {
  476. throw new ReaderException('Expecting 8 byte string');
  477. }
  478. // convert to units of 100 ns since 1601:
  479. $unpackedTimestamp = unpack('v4', $oleTimestamp);
  480. $timestampHigh = (float) $unpackedTimestamp[4] * 65536 + (float) $unpackedTimestamp[3];
  481. $timestampLow = (float) $unpackedTimestamp[2] * 65536 + (float) $unpackedTimestamp[1];
  482. // translate to seconds since 1601:
  483. $timestampHigh /= 10000000;
  484. $timestampLow /= 10000000;
  485. // days from 1601 to 1970:
  486. $days = 134774;
  487. // translate to seconds since 1970:
  488. $unixTimestamp = floor(65536.0 * 65536.0 * $timestampHigh + $timestampLow - $days * 24 * 3600 + 0.5);
  489. return IntOrFloat::evaluate($unixTimestamp);
  490. }
  491. }