Unescaper.php 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. use Symfony\Component\Yaml\Exception\ParseException;
  12. /**
  13. * Unescaper encapsulates unescaping rules for single and double-quoted
  14. * YAML strings.
  15. *
  16. * @author Matthew Lewinski <matthew@lewinski.org>
  17. *
  18. * @internal
  19. */
  20. class Unescaper
  21. {
  22. /**
  23. * Regex fragment that matches an escaped character in a double quoted string.
  24. */
  25. const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
  26. /**
  27. * Unescapes a single quoted string.
  28. *
  29. * @param string $value A single quoted string
  30. *
  31. * @return string The unescaped string
  32. */
  33. public function unescapeSingleQuotedString(string $value): string
  34. {
  35. return str_replace('\'\'', '\'', $value);
  36. }
  37. /**
  38. * Unescapes a double quoted string.
  39. *
  40. * @param string $value A double quoted string
  41. *
  42. * @return string The unescaped string
  43. */
  44. public function unescapeDoubleQuotedString(string $value): string
  45. {
  46. $callback = function ($match) {
  47. return $this->unescapeCharacter($match[0]);
  48. };
  49. // evaluate the string
  50. return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
  51. }
  52. /**
  53. * Unescapes a character that was found in a double-quoted string.
  54. *
  55. * @param string $value An escaped character
  56. *
  57. * @return string The unescaped character
  58. */
  59. private function unescapeCharacter(string $value): string
  60. {
  61. switch ($value[1]) {
  62. case '0':
  63. return "\x0";
  64. case 'a':
  65. return "\x7";
  66. case 'b':
  67. return "\x8";
  68. case 't':
  69. return "\t";
  70. case "\t":
  71. return "\t";
  72. case 'n':
  73. return "\n";
  74. case 'v':
  75. return "\xB";
  76. case 'f':
  77. return "\xC";
  78. case 'r':
  79. return "\r";
  80. case 'e':
  81. return "\x1B";
  82. case ' ':
  83. return ' ';
  84. case '"':
  85. return '"';
  86. case '/':
  87. return '/';
  88. case '\\':
  89. return '\\';
  90. case 'N':
  91. // U+0085 NEXT LINE
  92. return "\xC2\x85";
  93. case '_':
  94. // U+00A0 NO-BREAK SPACE
  95. return "\xC2\xA0";
  96. case 'L':
  97. // U+2028 LINE SEPARATOR
  98. return "\xE2\x80\xA8";
  99. case 'P':
  100. // U+2029 PARAGRAPH SEPARATOR
  101. return "\xE2\x80\xA9";
  102. case 'x':
  103. return self::utf8chr(hexdec(substr($value, 2, 2)));
  104. case 'u':
  105. return self::utf8chr(hexdec(substr($value, 2, 4)));
  106. case 'U':
  107. return self::utf8chr(hexdec(substr($value, 2, 8)));
  108. default:
  109. throw new ParseException(sprintf('Found unknown escape character "%s".', $value));
  110. }
  111. }
  112. /**
  113. * Get the UTF-8 character for the given code point.
  114. */
  115. private static function utf8chr(int $c): string
  116. {
  117. if (0x80 > $c %= 0x200000) {
  118. return \chr($c);
  119. }
  120. if (0x800 > $c) {
  121. return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F);
  122. }
  123. if (0x10000 > $c) {
  124. return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
  125. }
  126. return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
  127. }
  128. }