1 | <?php |
---|
2 | |
---|
3 | /* |
---|
4 | * This file is part of the Symfony package. |
---|
5 | * (c) Fabien Potencier <fabien@symfony.com> |
---|
6 | * |
---|
7 | * For the full copyright and license information, please view the LICENSE |
---|
8 | * file that was distributed with this source code. |
---|
9 | */ |
---|
10 | |
---|
11 | namespace Symfony\Component\Yaml; |
---|
12 | |
---|
13 | /** |
---|
14 | * Unescaper encapsulates unescaping rules for single and double-quoted |
---|
15 | * YAML strings. |
---|
16 | * |
---|
17 | * @author Matthew Lewinski <matthew@lewinski.org> |
---|
18 | */ |
---|
19 | class Unescaper |
---|
20 | { |
---|
21 | // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters |
---|
22 | // must be converted to that encoding. |
---|
23 | const ENCODING = 'UTF-8'; |
---|
24 | |
---|
25 | // Regex fragment that matches an escaped character in a double quoted |
---|
26 | // string. |
---|
27 | const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})"; |
---|
28 | |
---|
29 | /** |
---|
30 | * Unescapes a single quoted string. |
---|
31 | * |
---|
32 | * @param string $value A single quoted string. |
---|
33 | * |
---|
34 | * @return string The unescaped string. |
---|
35 | */ |
---|
36 | public function unescapeSingleQuotedString($value) |
---|
37 | { |
---|
38 | return str_replace('\'\'', '\'', $value); |
---|
39 | } |
---|
40 | |
---|
41 | /** |
---|
42 | * Unescapes a double quoted string. |
---|
43 | * |
---|
44 | * @param string $value A double quoted string. |
---|
45 | * |
---|
46 | * @return string The unescaped string. |
---|
47 | */ |
---|
48 | public function unescapeDoubleQuotedString($value) |
---|
49 | { |
---|
50 | $self = $this; |
---|
51 | $callback = function($match) use($self) { |
---|
52 | return $self->unescapeCharacter($match[0]); |
---|
53 | }; |
---|
54 | |
---|
55 | // evaluate the string |
---|
56 | return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value); |
---|
57 | } |
---|
58 | |
---|
59 | /** |
---|
60 | * Unescapes a character that was found in a double-quoted string |
---|
61 | * |
---|
62 | * @param string $value An escaped character |
---|
63 | * |
---|
64 | * @return string The unescaped character |
---|
65 | */ |
---|
66 | public function unescapeCharacter($value) |
---|
67 | { |
---|
68 | switch ($value{1}) { |
---|
69 | case '0': |
---|
70 | return "\x0"; |
---|
71 | case 'a': |
---|
72 | return "\x7"; |
---|
73 | case 'b': |
---|
74 | return "\x8"; |
---|
75 | case 't': |
---|
76 | return "\t"; |
---|
77 | case "\t": |
---|
78 | return "\t"; |
---|
79 | case 'n': |
---|
80 | return "\n"; |
---|
81 | case 'v': |
---|
82 | return "\xb"; |
---|
83 | case 'f': |
---|
84 | return "\xc"; |
---|
85 | case 'r': |
---|
86 | return "\xd"; |
---|
87 | case 'e': |
---|
88 | return "\x1b"; |
---|
89 | case ' ': |
---|
90 | return ' '; |
---|
91 | case '"': |
---|
92 | return '"'; |
---|
93 | case '/': |
---|
94 | return '/'; |
---|
95 | case '\\': |
---|
96 | return '\\'; |
---|
97 | case 'N': |
---|
98 | // U+0085 NEXT LINE |
---|
99 | return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE'); |
---|
100 | case '_': |
---|
101 | // U+00A0 NO-BREAK SPACE |
---|
102 | return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE'); |
---|
103 | case 'L': |
---|
104 | // U+2028 LINE SEPARATOR |
---|
105 | return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE'); |
---|
106 | case 'P': |
---|
107 | // U+2029 PARAGRAPH SEPARATOR |
---|
108 | return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE'); |
---|
109 | case 'x': |
---|
110 | $char = pack('n', hexdec(substr($value, 2, 2))); |
---|
111 | |
---|
112 | return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); |
---|
113 | case 'u': |
---|
114 | $char = pack('n', hexdec(substr($value, 2, 4))); |
---|
115 | |
---|
116 | return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); |
---|
117 | case 'U': |
---|
118 | $char = pack('N', hexdec(substr($value, 2, 8))); |
---|
119 | |
---|
120 | return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE'); |
---|
121 | } |
---|
122 | } |
---|
123 | |
---|
124 | /** |
---|
125 | * Convert a string from one encoding to another. |
---|
126 | * |
---|
127 | * @param string $value The string to convert |
---|
128 | * @param string $to The input encoding |
---|
129 | * @param string $from The output encoding |
---|
130 | * |
---|
131 | * @return string The string with the new encoding |
---|
132 | * |
---|
133 | * @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring) |
---|
134 | */ |
---|
135 | private function convertEncoding($value, $to, $from) |
---|
136 | { |
---|
137 | if (function_exists('iconv')) { |
---|
138 | return iconv($from, $to, $value); |
---|
139 | } elseif (function_exists('mb_convert_encoding')) { |
---|
140 | return mb_convert_encoding($value, $to, $from); |
---|
141 | } |
---|
142 | |
---|
143 | throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).'); |
---|
144 | } |
---|
145 | } |
---|