1 | <?php |
---|
2 | /** |
---|
3 | * PHPExcel |
---|
4 | * |
---|
5 | * Copyright (c) 2006 - 2014 PHPExcel |
---|
6 | * |
---|
7 | * This library is free software; you can redistribute it and/or |
---|
8 | * modify it under the terms of the GNU Lesser General Public |
---|
9 | * License as published by the Free Software Foundation; either |
---|
10 | * version 2.1 of the License, or (at your option) any later version. |
---|
11 | * |
---|
12 | * This library is distributed in the hope that it will be useful, |
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
15 | * Lesser General Public License for more details. |
---|
16 | * |
---|
17 | * You should have received a copy of the GNU Lesser General Public |
---|
18 | * License along with this library; if not, write to the Free Software |
---|
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
---|
20 | * |
---|
21 | * @category PHPExcel |
---|
22 | * @package PHPExcel_Shared |
---|
23 | * @copyright Copyright (c) 2006 - 2014 PHPExcel (http://www.codeplex.com/PHPExcel) |
---|
24 | * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL |
---|
25 | * @version 1.8.0, 2014-03-02 |
---|
26 | */ |
---|
27 | |
---|
28 | |
---|
29 | /** |
---|
30 | * PHPExcel_Shared_String |
---|
31 | * |
---|
32 | * @category PHPExcel |
---|
33 | * @package PHPExcel_Shared |
---|
34 | * @copyright Copyright (c) 2006 - 2014 PHPExcel (http://www.codeplex.com/PHPExcel) |
---|
35 | */ |
---|
36 | class PHPExcel_Shared_String |
---|
37 | { |
---|
38 | /** Constants */ |
---|
39 | /** Regular Expressions */ |
---|
40 | // Fraction |
---|
41 | const STRING_REGEXP_FRACTION = '(-?)(\d+)\s+(\d+\/\d+)'; |
---|
42 | |
---|
43 | |
---|
44 | /** |
---|
45 | * Control characters array |
---|
46 | * |
---|
47 | * @var string[] |
---|
48 | */ |
---|
49 | private static $_controlCharacters = array(); |
---|
50 | |
---|
51 | /** |
---|
52 | * SYLK Characters array |
---|
53 | * |
---|
54 | * $var array |
---|
55 | */ |
---|
56 | private static $_SYLKCharacters = array(); |
---|
57 | |
---|
58 | /** |
---|
59 | * Decimal separator |
---|
60 | * |
---|
61 | * @var string |
---|
62 | */ |
---|
63 | private static $_decimalSeparator; |
---|
64 | |
---|
65 | /** |
---|
66 | * Thousands separator |
---|
67 | * |
---|
68 | * @var string |
---|
69 | */ |
---|
70 | private static $_thousandsSeparator; |
---|
71 | |
---|
72 | /** |
---|
73 | * Currency code |
---|
74 | * |
---|
75 | * @var string |
---|
76 | */ |
---|
77 | private static $_currencyCode; |
---|
78 | |
---|
79 | /** |
---|
80 | * Is mbstring extension avalable? |
---|
81 | * |
---|
82 | * @var boolean |
---|
83 | */ |
---|
84 | private static $_isMbstringEnabled; |
---|
85 | |
---|
86 | /** |
---|
87 | * Is iconv extension avalable? |
---|
88 | * |
---|
89 | * @var boolean |
---|
90 | */ |
---|
91 | private static $_isIconvEnabled; |
---|
92 | |
---|
93 | /** |
---|
94 | * Build control characters array |
---|
95 | */ |
---|
96 | private static function _buildControlCharacters() { |
---|
97 | for ($i = 0; $i <= 31; ++$i) { |
---|
98 | if ($i != 9 && $i != 10 && $i != 13) { |
---|
99 | $find = '_x' . sprintf('%04s' , strtoupper(dechex($i))) . '_'; |
---|
100 | $replace = chr($i); |
---|
101 | self::$_controlCharacters[$find] = $replace; |
---|
102 | } |
---|
103 | } |
---|
104 | } |
---|
105 | |
---|
106 | /** |
---|
107 | * Build SYLK characters array |
---|
108 | */ |
---|
109 | private static function _buildSYLKCharacters() |
---|
110 | { |
---|
111 | self::$_SYLKCharacters = array( |
---|
112 | "\x1B 0" => chr(0), |
---|
113 | "\x1B 1" => chr(1), |
---|
114 | "\x1B 2" => chr(2), |
---|
115 | "\x1B 3" => chr(3), |
---|
116 | "\x1B 4" => chr(4), |
---|
117 | "\x1B 5" => chr(5), |
---|
118 | "\x1B 6" => chr(6), |
---|
119 | "\x1B 7" => chr(7), |
---|
120 | "\x1B 8" => chr(8), |
---|
121 | "\x1B 9" => chr(9), |
---|
122 | "\x1B :" => chr(10), |
---|
123 | "\x1B ;" => chr(11), |
---|
124 | "\x1B <" => chr(12), |
---|
125 | "\x1B :" => chr(13), |
---|
126 | "\x1B >" => chr(14), |
---|
127 | "\x1B ?" => chr(15), |
---|
128 | "\x1B!0" => chr(16), |
---|
129 | "\x1B!1" => chr(17), |
---|
130 | "\x1B!2" => chr(18), |
---|
131 | "\x1B!3" => chr(19), |
---|
132 | "\x1B!4" => chr(20), |
---|
133 | "\x1B!5" => chr(21), |
---|
134 | "\x1B!6" => chr(22), |
---|
135 | "\x1B!7" => chr(23), |
---|
136 | "\x1B!8" => chr(24), |
---|
137 | "\x1B!9" => chr(25), |
---|
138 | "\x1B!:" => chr(26), |
---|
139 | "\x1B!;" => chr(27), |
---|
140 | "\x1B!<" => chr(28), |
---|
141 | "\x1B!=" => chr(29), |
---|
142 | "\x1B!>" => chr(30), |
---|
143 | "\x1B!?" => chr(31), |
---|
144 | "\x1B'?" => chr(127), |
---|
145 | "\x1B(0" => 'â¬', // 128 in CP1252 |
---|
146 | "\x1B(2" => 'â', // 130 in CP1252 |
---|
147 | "\x1B(3" => 'Æ', // 131 in CP1252 |
---|
148 | "\x1B(4" => 'â', // 132 in CP1252 |
---|
149 | "\x1B(5" => 'âŠ', // 133 in CP1252 |
---|
150 | "\x1B(6" => 'â ', // 134 in CP1252 |
---|
151 | "\x1B(7" => 'â¡', // 135 in CP1252 |
---|
152 | "\x1B(8" => 'Ë', // 136 in CP1252 |
---|
153 | "\x1B(9" => 'â°', // 137 in CP1252 |
---|
154 | "\x1B(:" => 'Å ', // 138 in CP1252 |
---|
155 | "\x1B(;" => 'â¹', // 139 in CP1252 |
---|
156 | "\x1BNj" => 'Å', // 140 in CP1252 |
---|
157 | "\x1B(>" => 'Ŝ', // 142 in CP1252 |
---|
158 | "\x1B)1" => 'â', // 145 in CP1252 |
---|
159 | "\x1B)2" => 'â', // 146 in CP1252 |
---|
160 | "\x1B)3" => 'â', // 147 in CP1252 |
---|
161 | "\x1B)4" => 'â', // 148 in CP1252 |
---|
162 | "\x1B)5" => 'â¢', // 149 in CP1252 |
---|
163 | "\x1B)6" => 'â', // 150 in CP1252 |
---|
164 | "\x1B)7" => 'â', // 151 in CP1252 |
---|
165 | "\x1B)8" => 'Ë', // 152 in CP1252 |
---|
166 | "\x1B)9" => 'â¢', // 153 in CP1252 |
---|
167 | "\x1B):" => 'Å¡', // 154 in CP1252 |
---|
168 | "\x1B);" => 'âº', // 155 in CP1252 |
---|
169 | "\x1BNz" => 'Å', // 156 in CP1252 |
---|
170 | "\x1B)>" => 'ÅŸ', // 158 in CP1252 |
---|
171 | "\x1B)?" => 'Åž', // 159 in CP1252 |
---|
172 | "\x1B*0" => 'Â ', // 160 in CP1252 |
---|
173 | "\x1BN!" => '¡', // 161 in CP1252 |
---|
174 | "\x1BN\"" => '¢', // 162 in CP1252 |
---|
175 | "\x1BN#" => '£', // 163 in CP1252 |
---|
176 | "\x1BN(" => '€', // 164 in CP1252 |
---|
177 | "\x1BN%" => 'Â¥', // 165 in CP1252 |
---|
178 | "\x1B*6" => 'Š', // 166 in CP1252 |
---|
179 | "\x1BN'" => '§', // 167 in CP1252 |
---|
180 | "\x1BNH " => 'š', // 168 in CP1252 |
---|
181 | "\x1BNS" => '©', // 169 in CP1252 |
---|
182 | "\x1BNc" => 'ª', // 170 in CP1252 |
---|
183 | "\x1BN+" => '«', // 171 in CP1252 |
---|
184 | "\x1B*<" => '¬', // 172 in CP1252 |
---|
185 | "\x1B*=" => 'Â', // 173 in CP1252 |
---|
186 | "\x1BNR" => '®', // 174 in CP1252 |
---|
187 | "\x1B*?" => '¯', // 175 in CP1252 |
---|
188 | "\x1BN0" => '°', // 176 in CP1252 |
---|
189 | "\x1BN1" => '±', // 177 in CP1252 |
---|
190 | "\x1BN2" => '²', // 178 in CP1252 |
---|
191 | "\x1BN3" => '³', // 179 in CP1252 |
---|
192 | "\x1BNB " => 'ÂŽ', // 180 in CP1252 |
---|
193 | "\x1BN5" => 'µ', // 181 in CP1252 |
---|
194 | "\x1BN6" => '¶', // 182 in CP1252 |
---|
195 | "\x1BN7" => '·', // 183 in CP1252 |
---|
196 | "\x1B+8" => 'ž', // 184 in CP1252 |
---|
197 | "\x1BNQ" => '¹', // 185 in CP1252 |
---|
198 | "\x1BNk" => 'º', // 186 in CP1252 |
---|
199 | "\x1BN;" => '»', // 187 in CP1252 |
---|
200 | "\x1BN<" => 'Œ', // 188 in CP1252 |
---|
201 | "\x1BN=" => 'œ', // 189 in CP1252 |
---|
202 | "\x1BN>" => 'Ÿ', // 190 in CP1252 |
---|
203 | "\x1BN?" => '¿', // 191 in CP1252 |
---|
204 | "\x1BNAA" => 'Ã', // 192 in CP1252 |
---|
205 | "\x1BNBA" => 'Ã', // 193 in CP1252 |
---|
206 | "\x1BNCA" => 'Ã', // 194 in CP1252 |
---|
207 | "\x1BNDA" => 'Ã', // 195 in CP1252 |
---|
208 | "\x1BNHA" => 'Ã', // 196 in CP1252 |
---|
209 | "\x1BNJA" => 'Ã
', // 197 in CP1252 |
---|
210 | "\x1BNa" => 'Ã', // 198 in CP1252 |
---|
211 | "\x1BNKC" => 'Ã', // 199 in CP1252 |
---|
212 | "\x1BNAE" => 'Ã', // 200 in CP1252 |
---|
213 | "\x1BNBE" => 'Ã', // 201 in CP1252 |
---|
214 | "\x1BNCE" => 'Ã', // 202 in CP1252 |
---|
215 | "\x1BNHE" => 'Ã', // 203 in CP1252 |
---|
216 | "\x1BNAI" => 'Ã', // 204 in CP1252 |
---|
217 | "\x1BNBI" => 'Ã', // 205 in CP1252 |
---|
218 | "\x1BNCI" => 'Ã', // 206 in CP1252 |
---|
219 | "\x1BNHI" => 'Ã', // 207 in CP1252 |
---|
220 | "\x1BNb" => 'Ã', // 208 in CP1252 |
---|
221 | "\x1BNDN" => 'Ã', // 209 in CP1252 |
---|
222 | "\x1BNAO" => 'Ã', // 210 in CP1252 |
---|
223 | "\x1BNBO" => 'Ã', // 211 in CP1252 |
---|
224 | "\x1BNCO" => 'Ã', // 212 in CP1252 |
---|
225 | "\x1BNDO" => 'Ã', // 213 in CP1252 |
---|
226 | "\x1BNHO" => 'Ã', // 214 in CP1252 |
---|
227 | "\x1B-7" => 'Ã', // 215 in CP1252 |
---|
228 | "\x1BNi" => 'Ã', // 216 in CP1252 |
---|
229 | "\x1BNAU" => 'Ã', // 217 in CP1252 |
---|
230 | "\x1BNBU" => 'Ã', // 218 in CP1252 |
---|
231 | "\x1BNCU" => 'Ã', // 219 in CP1252 |
---|
232 | "\x1BNHU" => 'Ã', // 220 in CP1252 |
---|
233 | "\x1B-=" => 'Ã', // 221 in CP1252 |
---|
234 | "\x1BNl" => 'Ã', // 222 in CP1252 |
---|
235 | "\x1BN{" => 'Ã', // 223 in CP1252 |
---|
236 | "\x1BNAa" => 'Ã ', // 224 in CP1252 |
---|
237 | "\x1BNBa" => 'á', // 225 in CP1252 |
---|
238 | "\x1BNCa" => 'â', // 226 in CP1252 |
---|
239 | "\x1BNDa" => 'ã', // 227 in CP1252 |
---|
240 | "\x1BNHa" => 'À', // 228 in CP1252 |
---|
241 | "\x1BNJa" => 'Ã¥', // 229 in CP1252 |
---|
242 | "\x1BNq" => 'Ê', // 230 in CP1252 |
---|
243 | "\x1BNKc" => 'ç', // 231 in CP1252 |
---|
244 | "\x1BNAe" => 'Ú', // 232 in CP1252 |
---|
245 | "\x1BNBe" => 'é', // 233 in CP1252 |
---|
246 | "\x1BNCe" => 'ê', // 234 in CP1252 |
---|
247 | "\x1BNHe" => 'ë', // 235 in CP1252 |
---|
248 | "\x1BNAi" => 'ì', // 236 in CP1252 |
---|
249 | "\x1BNBi" => 'Ã', // 237 in CP1252 |
---|
250 | "\x1BNCi" => 'î', // 238 in CP1252 |
---|
251 | "\x1BNHi" => 'ï', // 239 in CP1252 |
---|
252 | "\x1BNs" => 'ð', // 240 in CP1252 |
---|
253 | "\x1BNDn" => 'ñ', // 241 in CP1252 |
---|
254 | "\x1BNAo" => 'ò', // 242 in CP1252 |
---|
255 | "\x1BNBo" => 'ó', // 243 in CP1252 |
---|
256 | "\x1BNCo" => 'ÃŽ', // 244 in CP1252 |
---|
257 | "\x1BNDo" => 'õ', // 245 in CP1252 |
---|
258 | "\x1BNHo" => 'ö', // 246 in CP1252 |
---|
259 | "\x1B/7" => '÷', // 247 in CP1252 |
---|
260 | "\x1BNy" => 'Þ', // 248 in CP1252 |
---|
261 | "\x1BNAu" => 'ù', // 249 in CP1252 |
---|
262 | "\x1BNBu" => 'ú', // 250 in CP1252 |
---|
263 | "\x1BNCu" => 'û', // 251 in CP1252 |
---|
264 | "\x1BNHu" => 'Ì', // 252 in CP1252 |
---|
265 | "\x1B/=" => 'Ü', // 253 in CP1252 |
---|
266 | "\x1BN|" => 'ß', // 254 in CP1252 |
---|
267 | "\x1BNHy" => 'ÿ', // 255 in CP1252 |
---|
268 | ); |
---|
269 | } |
---|
270 | |
---|
271 | /** |
---|
272 | * Get whether mbstring extension is available |
---|
273 | * |
---|
274 | * @return boolean |
---|
275 | */ |
---|
276 | public static function getIsMbstringEnabled() |
---|
277 | { |
---|
278 | if (isset(self::$_isMbstringEnabled)) { |
---|
279 | return self::$_isMbstringEnabled; |
---|
280 | } |
---|
281 | |
---|
282 | self::$_isMbstringEnabled = function_exists('mb_convert_encoding') ? |
---|
283 | true : false; |
---|
284 | |
---|
285 | return self::$_isMbstringEnabled; |
---|
286 | } |
---|
287 | |
---|
288 | /** |
---|
289 | * Get whether iconv extension is available |
---|
290 | * |
---|
291 | * @return boolean |
---|
292 | */ |
---|
293 | public static function getIsIconvEnabled() |
---|
294 | { |
---|
295 | if (isset(self::$_isIconvEnabled)) { |
---|
296 | return self::$_isIconvEnabled; |
---|
297 | } |
---|
298 | |
---|
299 | // Fail if iconv doesn't exist |
---|
300 | if (!function_exists('iconv')) { |
---|
301 | self::$_isIconvEnabled = false; |
---|
302 | return false; |
---|
303 | } |
---|
304 | |
---|
305 | // Sometimes iconv is not working, and e.g. iconv('UTF-8', 'UTF-16LE', 'x') just returns false, |
---|
306 | if (!@iconv('UTF-8', 'UTF-16LE', 'x')) { |
---|
307 | self::$_isIconvEnabled = false; |
---|
308 | return false; |
---|
309 | } |
---|
310 | |
---|
311 | // Sometimes iconv_substr('A', 0, 1, 'UTF-8') just returns false in PHP 5.2.0 |
---|
312 | // we cannot use iconv in that case either (http://bugs.php.net/bug.php?id=37773) |
---|
313 | if (!@iconv_substr('A', 0, 1, 'UTF-8')) { |
---|
314 | self::$_isIconvEnabled = false; |
---|
315 | return false; |
---|
316 | } |
---|
317 | |
---|
318 | // CUSTOM: IBM AIX iconv() does not work |
---|
319 | if ( defined('PHP_OS') && @stristr(PHP_OS, 'AIX') |
---|
320 | && defined('ICONV_IMPL') && (@strcasecmp(ICONV_IMPL, 'unknown') == 0) |
---|
321 | && defined('ICONV_VERSION') && (@strcasecmp(ICONV_VERSION, 'unknown') == 0) ) |
---|
322 | { |
---|
323 | self::$_isIconvEnabled = false; |
---|
324 | return false; |
---|
325 | } |
---|
326 | |
---|
327 | // If we reach here no problems were detected with iconv |
---|
328 | self::$_isIconvEnabled = true; |
---|
329 | return true; |
---|
330 | } |
---|
331 | |
---|
332 | public static function buildCharacterSets() { |
---|
333 | if(empty(self::$_controlCharacters)) { |
---|
334 | self::_buildControlCharacters(); |
---|
335 | } |
---|
336 | if(empty(self::$_SYLKCharacters)) { |
---|
337 | self::_buildSYLKCharacters(); |
---|
338 | } |
---|
339 | } |
---|
340 | |
---|
341 | /** |
---|
342 | * Convert from OpenXML escaped control character to PHP control character |
---|
343 | * |
---|
344 | * Excel 2007 team: |
---|
345 | * ---------------- |
---|
346 | * That's correct, control characters are stored directly in the shared-strings table. |
---|
347 | * We do encode characters that cannot be represented in XML using the following escape sequence: |
---|
348 | * _xHHHH_ where H represents a hexadecimal character in the character's value... |
---|
349 | * So you could end up with something like _x0008_ in a string (either in a cell value (<v>) |
---|
350 | * element or in the shared string <t> element. |
---|
351 | * |
---|
352 | * @param string $value Value to unescape |
---|
353 | * @return string |
---|
354 | */ |
---|
355 | public static function ControlCharacterOOXML2PHP($value = '') { |
---|
356 | return str_replace( array_keys(self::$_controlCharacters), array_values(self::$_controlCharacters), $value ); |
---|
357 | } |
---|
358 | |
---|
359 | /** |
---|
360 | * Convert from PHP control character to OpenXML escaped control character |
---|
361 | * |
---|
362 | * Excel 2007 team: |
---|
363 | * ---------------- |
---|
364 | * That's correct, control characters are stored directly in the shared-strings table. |
---|
365 | * We do encode characters that cannot be represented in XML using the following escape sequence: |
---|
366 | * _xHHHH_ where H represents a hexadecimal character in the character's value... |
---|
367 | * So you could end up with something like _x0008_ in a string (either in a cell value (<v>) |
---|
368 | * element or in the shared string <t> element. |
---|
369 | * |
---|
370 | * @param string $value Value to escape |
---|
371 | * @return string |
---|
372 | */ |
---|
373 | public static function ControlCharacterPHP2OOXML($value = '') { |
---|
374 | return str_replace( array_values(self::$_controlCharacters), array_keys(self::$_controlCharacters), $value ); |
---|
375 | } |
---|
376 | |
---|
377 | /** |
---|
378 | * Try to sanitize UTF8, stripping invalid byte sequences. Not perfect. Does not surrogate characters. |
---|
379 | * |
---|
380 | * @param string $value |
---|
381 | * @return string |
---|
382 | */ |
---|
383 | public static function SanitizeUTF8($value) |
---|
384 | { |
---|
385 | if (self::getIsIconvEnabled()) { |
---|
386 | $value = @iconv('UTF-8', 'UTF-8', $value); |
---|
387 | return $value; |
---|
388 | } |
---|
389 | |
---|
390 | if (self::getIsMbstringEnabled()) { |
---|
391 | $value = mb_convert_encoding($value, 'UTF-8', 'UTF-8'); |
---|
392 | return $value; |
---|
393 | } |
---|
394 | |
---|
395 | // else, no conversion |
---|
396 | return $value; |
---|
397 | } |
---|
398 | |
---|
399 | /** |
---|
400 | * Check if a string contains UTF8 data |
---|
401 | * |
---|
402 | * @param string $value |
---|
403 | * @return boolean |
---|
404 | */ |
---|
405 | public static function IsUTF8($value = '') { |
---|
406 | return $string === '' || preg_match('/^./su', $string) === 1; |
---|
407 | } |
---|
408 | |
---|
409 | /** |
---|
410 | * Formats a numeric value as a string for output in various output writers forcing |
---|
411 | * point as decimal separator in case locale is other than English. |
---|
412 | * |
---|
413 | * @param mixed $value |
---|
414 | * @return string |
---|
415 | */ |
---|
416 | public static function FormatNumber($value) { |
---|
417 | if (is_float($value)) { |
---|
418 | return str_replace(',', '.', $value); |
---|
419 | } |
---|
420 | return (string) $value; |
---|
421 | } |
---|
422 | |
---|
423 | /** |
---|
424 | * Converts a UTF-8 string into BIFF8 Unicode string data (8-bit string length) |
---|
425 | * Writes the string using uncompressed notation, no rich text, no Asian phonetics |
---|
426 | * If mbstring extension is not available, ASCII is assumed, and compressed notation is used |
---|
427 | * although this will give wrong results for non-ASCII strings |
---|
428 | * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3 |
---|
429 | * |
---|
430 | * @param string $value UTF-8 encoded string |
---|
431 | * @param mixed[] $arrcRuns Details of rich text runs in $value |
---|
432 | * @return string |
---|
433 | */ |
---|
434 | public static function UTF8toBIFF8UnicodeShort($value, $arrcRuns = array()) |
---|
435 | { |
---|
436 | // character count |
---|
437 | $ln = self::CountCharacters($value, 'UTF-8'); |
---|
438 | // option flags |
---|
439 | if(empty($arrcRuns)){ |
---|
440 | $opt = (self::getIsIconvEnabled() || self::getIsMbstringEnabled()) ? |
---|
441 | 0x0001 : 0x0000; |
---|
442 | $data = pack('CC', $ln, $opt); |
---|
443 | // characters |
---|
444 | $data .= self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8'); |
---|
445 | } |
---|
446 | else { |
---|
447 | $data = pack('vC', $ln, 0x09); |
---|
448 | $data .= pack('v', count($arrcRuns)); |
---|
449 | // characters |
---|
450 | $data .= self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8'); |
---|
451 | foreach ($arrcRuns as $cRun){ |
---|
452 | $data .= pack('v', $cRun['strlen']); |
---|
453 | $data .= pack('v', $cRun['fontidx']); |
---|
454 | } |
---|
455 | } |
---|
456 | return $data; |
---|
457 | } |
---|
458 | |
---|
459 | /** |
---|
460 | * Converts a UTF-8 string into BIFF8 Unicode string data (16-bit string length) |
---|
461 | * Writes the string using uncompressed notation, no rich text, no Asian phonetics |
---|
462 | * If mbstring extension is not available, ASCII is assumed, and compressed notation is used |
---|
463 | * although this will give wrong results for non-ASCII strings |
---|
464 | * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3 |
---|
465 | * |
---|
466 | * @param string $value UTF-8 encoded string |
---|
467 | * @return string |
---|
468 | */ |
---|
469 | public static function UTF8toBIFF8UnicodeLong($value) |
---|
470 | { |
---|
471 | // character count |
---|
472 | $ln = self::CountCharacters($value, 'UTF-8'); |
---|
473 | |
---|
474 | // option flags |
---|
475 | $opt = (self::getIsIconvEnabled() || self::getIsMbstringEnabled()) ? |
---|
476 | 0x0001 : 0x0000; |
---|
477 | |
---|
478 | // characters |
---|
479 | $chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8'); |
---|
480 | |
---|
481 | $data = pack('vC', $ln, $opt) . $chars; |
---|
482 | return $data; |
---|
483 | } |
---|
484 | |
---|
485 | /** |
---|
486 | * Convert string from one encoding to another. First try mbstring, then iconv, finally strlen |
---|
487 | * |
---|
488 | * @param string $value |
---|
489 | * @param string $to Encoding to convert to, e.g. 'UTF-8' |
---|
490 | * @param string $from Encoding to convert from, e.g. 'UTF-16LE' |
---|
491 | * @return string |
---|
492 | */ |
---|
493 | public static function ConvertEncoding($value, $to, $from) |
---|
494 | { |
---|
495 | if (self::getIsIconvEnabled()) { |
---|
496 | return iconv($from, $to, $value); |
---|
497 | } |
---|
498 | |
---|
499 | if (self::getIsMbstringEnabled()) { |
---|
500 | return mb_convert_encoding($value, $to, $from); |
---|
501 | } |
---|
502 | |
---|
503 | if($from == 'UTF-16LE'){ |
---|
504 | return self::utf16_decode($value, false); |
---|
505 | }else if($from == 'UTF-16BE'){ |
---|
506 | return self::utf16_decode($value); |
---|
507 | } |
---|
508 | // else, no conversion |
---|
509 | return $value; |
---|
510 | } |
---|
511 | |
---|
512 | /** |
---|
513 | * Decode UTF-16 encoded strings. |
---|
514 | * |
---|
515 | * Can handle both BOM'ed data and un-BOM'ed data. |
---|
516 | * Assumes Big-Endian byte order if no BOM is available. |
---|
517 | * This function was taken from http://php.net/manual/en/function.utf8-decode.php |
---|
518 | * and $bom_be parameter added. |
---|
519 | * |
---|
520 | * @param string $str UTF-16 encoded data to decode. |
---|
521 | * @return string UTF-8 / ISO encoded data. |
---|
522 | * @access public |
---|
523 | * @version 0.2 / 2010-05-13 |
---|
524 | * @author Rasmus Andersson {@link http://rasmusandersson.se/} |
---|
525 | * @author vadik56 |
---|
526 | */ |
---|
527 | public static function utf16_decode($str, $bom_be = TRUE) { |
---|
528 | if( strlen($str) < 2 ) return $str; |
---|
529 | $c0 = ord($str{0}); |
---|
530 | $c1 = ord($str{1}); |
---|
531 | if( $c0 == 0xfe && $c1 == 0xff ) { $str = substr($str,2); } |
---|
532 | elseif( $c0 == 0xff && $c1 == 0xfe ) { $str = substr($str,2); $bom_be = false; } |
---|
533 | $len = strlen($str); |
---|
534 | $newstr = ''; |
---|
535 | for($i=0;$i<$len;$i+=2) { |
---|
536 | if( $bom_be ) { $val = ord($str{$i}) << 4; $val += ord($str{$i+1}); } |
---|
537 | else { $val = ord($str{$i+1}) << 4; $val += ord($str{$i}); } |
---|
538 | $newstr .= ($val == 0x228) ? "\n" : chr($val); |
---|
539 | } |
---|
540 | return $newstr; |
---|
541 | } |
---|
542 | |
---|
543 | /** |
---|
544 | * Get character count. First try mbstring, then iconv, finally strlen |
---|
545 | * |
---|
546 | * @param string $value |
---|
547 | * @param string $enc Encoding |
---|
548 | * @return int Character count |
---|
549 | */ |
---|
550 | public static function CountCharacters($value, $enc = 'UTF-8') |
---|
551 | { |
---|
552 | if (self::getIsMbstringEnabled()) { |
---|
553 | return mb_strlen($value, $enc); |
---|
554 | } |
---|
555 | |
---|
556 | if (self::getIsIconvEnabled()) { |
---|
557 | return iconv_strlen($value, $enc); |
---|
558 | } |
---|
559 | |
---|
560 | // else strlen |
---|
561 | return strlen($value); |
---|
562 | } |
---|
563 | |
---|
564 | /** |
---|
565 | * Get a substring of a UTF-8 encoded string. First try mbstring, then iconv, finally strlen |
---|
566 | * |
---|
567 | * @param string $pValue UTF-8 encoded string |
---|
568 | * @param int $pStart Start offset |
---|
569 | * @param int $pLength Maximum number of characters in substring |
---|
570 | * @return string |
---|
571 | */ |
---|
572 | public static function Substring($pValue = '', $pStart = 0, $pLength = 0) |
---|
573 | { |
---|
574 | if (self::getIsMbstringEnabled()) { |
---|
575 | return mb_substr($pValue, $pStart, $pLength, 'UTF-8'); |
---|
576 | } |
---|
577 | |
---|
578 | if (self::getIsIconvEnabled()) { |
---|
579 | return iconv_substr($pValue, $pStart, $pLength, 'UTF-8'); |
---|
580 | } |
---|
581 | |
---|
582 | // else substr |
---|
583 | return substr($pValue, $pStart, $pLength); |
---|
584 | } |
---|
585 | |
---|
586 | /** |
---|
587 | * Convert a UTF-8 encoded string to upper case |
---|
588 | * |
---|
589 | * @param string $pValue UTF-8 encoded string |
---|
590 | * @return string |
---|
591 | */ |
---|
592 | public static function StrToUpper($pValue = '') |
---|
593 | { |
---|
594 | if (function_exists('mb_convert_case')) { |
---|
595 | return mb_convert_case($pValue, MB_CASE_UPPER, "UTF-8"); |
---|
596 | } |
---|
597 | return strtoupper($pValue); |
---|
598 | } |
---|
599 | |
---|
600 | /** |
---|
601 | * Convert a UTF-8 encoded string to lower case |
---|
602 | * |
---|
603 | * @param string $pValue UTF-8 encoded string |
---|
604 | * @return string |
---|
605 | */ |
---|
606 | public static function StrToLower($pValue = '') |
---|
607 | { |
---|
608 | if (function_exists('mb_convert_case')) { |
---|
609 | return mb_convert_case($pValue, MB_CASE_LOWER, "UTF-8"); |
---|
610 | } |
---|
611 | return strtolower($pValue); |
---|
612 | } |
---|
613 | |
---|
614 | /** |
---|
615 | * Convert a UTF-8 encoded string to title/proper case |
---|
616 | * (uppercase every first character in each word, lower case all other characters) |
---|
617 | * |
---|
618 | * @param string $pValue UTF-8 encoded string |
---|
619 | * @return string |
---|
620 | */ |
---|
621 | public static function StrToTitle($pValue = '') |
---|
622 | { |
---|
623 | if (function_exists('mb_convert_case')) { |
---|
624 | return mb_convert_case($pValue, MB_CASE_TITLE, "UTF-8"); |
---|
625 | } |
---|
626 | return ucwords($pValue); |
---|
627 | } |
---|
628 | |
---|
629 | /** |
---|
630 | * Identify whether a string contains a fractional numeric value, |
---|
631 | * and convert it to a numeric if it is |
---|
632 | * |
---|
633 | * @param string &$operand string value to test |
---|
634 | * @return boolean |
---|
635 | */ |
---|
636 | public static function convertToNumberIfFraction(&$operand) { |
---|
637 | if (preg_match('/^'.self::STRING_REGEXP_FRACTION.'$/i', $operand, $match)) { |
---|
638 | $sign = ($match[1] == '-') ? '-' : '+'; |
---|
639 | $fractionFormula = '='.$sign.$match[2].$sign.$match[3]; |
---|
640 | $operand = PHPExcel_Calculation::getInstance()->_calculateFormulaValue($fractionFormula); |
---|
641 | return true; |
---|
642 | } |
---|
643 | return false; |
---|
644 | } // function convertToNumberIfFraction() |
---|
645 | |
---|
646 | /** |
---|
647 | * Get the decimal separator. If it has not yet been set explicitly, try to obtain number |
---|
648 | * formatting information from locale. |
---|
649 | * |
---|
650 | * @return string |
---|
651 | */ |
---|
652 | public static function getDecimalSeparator() |
---|
653 | { |
---|
654 | if (!isset(self::$_decimalSeparator)) { |
---|
655 | $localeconv = localeconv(); |
---|
656 | self::$_decimalSeparator = ($localeconv['decimal_point'] != '') |
---|
657 | ? $localeconv['decimal_point'] : $localeconv['mon_decimal_point']; |
---|
658 | |
---|
659 | if (self::$_decimalSeparator == '') { |
---|
660 | // Default to . |
---|
661 | self::$_decimalSeparator = '.'; |
---|
662 | } |
---|
663 | } |
---|
664 | return self::$_decimalSeparator; |
---|
665 | } |
---|
666 | |
---|
667 | /** |
---|
668 | * Set the decimal separator. Only used by PHPExcel_Style_NumberFormat::toFormattedString() |
---|
669 | * to format output by PHPExcel_Writer_HTML and PHPExcel_Writer_PDF |
---|
670 | * |
---|
671 | * @param string $pValue Character for decimal separator |
---|
672 | */ |
---|
673 | public static function setDecimalSeparator($pValue = '.') |
---|
674 | { |
---|
675 | self::$_decimalSeparator = $pValue; |
---|
676 | } |
---|
677 | |
---|
678 | /** |
---|
679 | * Get the thousands separator. If it has not yet been set explicitly, try to obtain number |
---|
680 | * formatting information from locale. |
---|
681 | * |
---|
682 | * @return string |
---|
683 | */ |
---|
684 | public static function getThousandsSeparator() |
---|
685 | { |
---|
686 | if (!isset(self::$_thousandsSeparator)) { |
---|
687 | $localeconv = localeconv(); |
---|
688 | self::$_thousandsSeparator = ($localeconv['thousands_sep'] != '') |
---|
689 | ? $localeconv['thousands_sep'] : $localeconv['mon_thousands_sep']; |
---|
690 | |
---|
691 | if (self::$_thousandsSeparator == '') { |
---|
692 | // Default to . |
---|
693 | self::$_thousandsSeparator = ','; |
---|
694 | } |
---|
695 | } |
---|
696 | return self::$_thousandsSeparator; |
---|
697 | } |
---|
698 | |
---|
699 | /** |
---|
700 | * Set the thousands separator. Only used by PHPExcel_Style_NumberFormat::toFormattedString() |
---|
701 | * to format output by PHPExcel_Writer_HTML and PHPExcel_Writer_PDF |
---|
702 | * |
---|
703 | * @param string $pValue Character for thousands separator |
---|
704 | */ |
---|
705 | public static function setThousandsSeparator($pValue = ',') |
---|
706 | { |
---|
707 | self::$_thousandsSeparator = $pValue; |
---|
708 | } |
---|
709 | |
---|
710 | /** |
---|
711 | * Get the currency code. If it has not yet been set explicitly, try to obtain the |
---|
712 | * symbol information from locale. |
---|
713 | * |
---|
714 | * @return string |
---|
715 | */ |
---|
716 | public static function getCurrencyCode() |
---|
717 | { |
---|
718 | if (!isset(self::$_currencyCode)) { |
---|
719 | $localeconv = localeconv(); |
---|
720 | self::$_currencyCode = ($localeconv['currency_symbol'] != '') |
---|
721 | ? $localeconv['currency_symbol'] : $localeconv['int_curr_symbol']; |
---|
722 | |
---|
723 | if (self::$_currencyCode == '') { |
---|
724 | // Default to $ |
---|
725 | self::$_currencyCode = '$'; |
---|
726 | } |
---|
727 | } |
---|
728 | return self::$_currencyCode; |
---|
729 | } |
---|
730 | |
---|
731 | /** |
---|
732 | * Set the currency code. Only used by PHPExcel_Style_NumberFormat::toFormattedString() |
---|
733 | * to format output by PHPExcel_Writer_HTML and PHPExcel_Writer_PDF |
---|
734 | * |
---|
735 | * @param string $pValue Character for currency code |
---|
736 | */ |
---|
737 | public static function setCurrencyCode($pValue = '$') |
---|
738 | { |
---|
739 | self::$_currencyCode = $pValue; |
---|
740 | } |
---|
741 | |
---|
742 | /** |
---|
743 | * Convert SYLK encoded string to UTF-8 |
---|
744 | * |
---|
745 | * @param string $pValue |
---|
746 | * @return string UTF-8 encoded string |
---|
747 | */ |
---|
748 | public static function SYLKtoUTF8($pValue = '') |
---|
749 | { |
---|
750 | // If there is no escape character in the string there is nothing to do |
---|
751 | if (strpos($pValue, '') === false) { |
---|
752 | return $pValue; |
---|
753 | } |
---|
754 | |
---|
755 | foreach (self::$_SYLKCharacters as $k => $v) { |
---|
756 | $pValue = str_replace($k, $v, $pValue); |
---|
757 | } |
---|
758 | |
---|
759 | return $pValue; |
---|
760 | } |
---|
761 | |
---|
762 | /** |
---|
763 | * Retrieve any leading numeric part of a string, or return the full string if no leading numeric |
---|
764 | * (handles basic integer or float, but not exponent or non decimal) |
---|
765 | * |
---|
766 | * @param string $value |
---|
767 | * @return mixed string or only the leading numeric part of the string |
---|
768 | */ |
---|
769 | public static function testStringAsNumeric($value) |
---|
770 | { |
---|
771 | if (is_numeric($value)) |
---|
772 | return $value; |
---|
773 | $v = floatval($value); |
---|
774 | return (is_numeric(substr($value,0,strlen($v)))) ? $v : $value; |
---|
775 | } |
---|
776 | } |
---|