CodePage.php 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheet\Shared;
  3. use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
  4. class CodePage
  5. {
  6. public const DEFAULT_CODE_PAGE = 'CP1252';
  7. /** @var array */
  8. private static $pageArray = [
  9. 0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
  10. 367 => 'ASCII', // ASCII
  11. 437 => 'CP437', // OEM US
  12. //720 => 'notsupported', // OEM Arabic
  13. 737 => 'CP737', // OEM Greek
  14. 775 => 'CP775', // OEM Baltic
  15. 850 => 'CP850', // OEM Latin I
  16. 852 => 'CP852', // OEM Latin II (Central European)
  17. 855 => 'CP855', // OEM Cyrillic
  18. 857 => 'CP857', // OEM Turkish
  19. 858 => 'CP858', // OEM Multilingual Latin I with Euro
  20. 860 => 'CP860', // OEM Portugese
  21. 861 => 'CP861', // OEM Icelandic
  22. 862 => 'CP862', // OEM Hebrew
  23. 863 => 'CP863', // OEM Canadian (French)
  24. 864 => 'CP864', // OEM Arabic
  25. 865 => 'CP865', // OEM Nordic
  26. 866 => 'CP866', // OEM Cyrillic (Russian)
  27. 869 => 'CP869', // OEM Greek (Modern)
  28. 874 => 'CP874', // ANSI Thai
  29. 932 => 'CP932', // ANSI Japanese Shift-JIS
  30. 936 => 'CP936', // ANSI Chinese Simplified GBK
  31. 949 => 'CP949', // ANSI Korean (Wansung)
  32. 950 => 'CP950', // ANSI Chinese Traditional BIG5
  33. 1200 => 'UTF-16LE', // UTF-16 (BIFF8)
  34. 1250 => 'CP1250', // ANSI Latin II (Central European)
  35. 1251 => 'CP1251', // ANSI Cyrillic
  36. 1252 => 'CP1252', // ANSI Latin I (BIFF4-BIFF7)
  37. 1253 => 'CP1253', // ANSI Greek
  38. 1254 => 'CP1254', // ANSI Turkish
  39. 1255 => 'CP1255', // ANSI Hebrew
  40. 1256 => 'CP1256', // ANSI Arabic
  41. 1257 => 'CP1257', // ANSI Baltic
  42. 1258 => 'CP1258', // ANSI Vietnamese
  43. 1361 => 'CP1361', // ANSI Korean (Johab)
  44. 10000 => 'MAC', // Apple Roman
  45. 10001 => 'CP932', // Macintosh Japanese
  46. 10002 => 'CP950', // Macintosh Chinese Traditional
  47. 10003 => 'CP1361', // Macintosh Korean
  48. 10004 => 'MACARABIC', // Apple Arabic
  49. 10005 => 'MACHEBREW', // Apple Hebrew
  50. 10006 => 'MACGREEK', // Macintosh Greek
  51. 10007 => 'MACCYRILLIC', // Macintosh Cyrillic
  52. 10008 => 'CP936', // Macintosh - Simplified Chinese (GB 2312)
  53. 10010 => 'MACROMANIA', // Macintosh Romania
  54. 10017 => 'MACUKRAINE', // Macintosh Ukraine
  55. 10021 => 'MACTHAI', // Macintosh Thai
  56. 10029 => ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE'], // Macintosh Central Europe
  57. 10079 => 'MACICELAND', // Macintosh Icelandic
  58. 10081 => 'MACTURKISH', // Macintosh Turkish
  59. 10082 => 'MACCROATIAN', // Macintosh Croatian
  60. 21010 => 'UTF-16LE', // UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
  61. 32768 => 'MAC', // Apple Roman
  62. //32769 => 'unsupported', // ANSI Latin I (BIFF2-BIFF3)
  63. 65000 => 'UTF-7', // Unicode (UTF-7)
  64. 65001 => 'UTF-8', // Unicode (UTF-8)
  65. 99999 => ['unsupported'], // Unicode (UTF-8)
  66. ];
  67. public static function validate(string $codePage): bool
  68. {
  69. return in_array($codePage, self::$pageArray, true);
  70. }
  71. /**
  72. * Convert Microsoft Code Page Identifier to Code Page Name which iconv
  73. * and mbstring understands.
  74. *
  75. * @param int $codePage Microsoft Code Page Indentifier
  76. *
  77. * @return string Code Page Name
  78. */
  79. public static function numberToName(int $codePage): string
  80. {
  81. if (array_key_exists($codePage, self::$pageArray)) {
  82. $value = self::$pageArray[$codePage];
  83. if (is_array($value)) {
  84. foreach ($value as $encoding) {
  85. if (@iconv('UTF-8', $encoding, ' ') !== false) {
  86. self::$pageArray[$codePage] = $encoding;
  87. return $encoding;
  88. }
  89. }
  90. throw new PhpSpreadsheetException("Code page $codePage not implemented on this system.");
  91. } else {
  92. return $value;
  93. }
  94. }
  95. if ($codePage == 720 || $codePage == 32769) {
  96. throw new PhpSpreadsheetException("Code page $codePage not supported."); // OEM Arabic
  97. }
  98. throw new PhpSpreadsheetException('Unknown codepage: ' . $codePage);
  99. }
  100. public static function getEncodings(): array
  101. {
  102. return self::$pageArray;
  103. }
  104. }