Unescaper.php 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. use Symfony\Component\Yaml\Exception\ParseException;
  12. /**
  13. * Unescaper encapsulates unescaping rules for single and double-quoted
  14. * YAML strings.
  15. *
  16. * @author Matthew Lewinski <matthew@lewinski.org>
  17. *
  18. * @internal
  19. */
  20. class Unescaper
  21. {
  22. /**
  23. * Regex fragment that matches an escaped character in a double quoted string.
  24. */
  25. public const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
  26. /**
  27. * Unescapes a single quoted string.
  28. *
  29. * @param string $value A single quoted string
  30. */
  31. public function unescapeSingleQuotedString(string $value): string
  32. {
  33. return str_replace('\'\'', '\'', $value);
  34. }
  35. /**
  36. * Unescapes a double quoted string.
  37. *
  38. * @param string $value A double quoted string
  39. */
  40. public function unescapeDoubleQuotedString(string $value): string
  41. {
  42. $callback = fn ($match) => $this->unescapeCharacter($match[0]);
  43. // evaluate the string
  44. return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
  45. }
  46. /**
  47. * Unescapes a character that was found in a double-quoted string.
  48. *
  49. * @param string $value An escaped character
  50. */
  51. private function unescapeCharacter(string $value): string
  52. {
  53. return match ($value[1]) {
  54. '0' => "\x0",
  55. 'a' => "\x7",
  56. 'b' => "\x8",
  57. 't' => "\t",
  58. "\t" => "\t",
  59. 'n' => "\n",
  60. 'v' => "\xB",
  61. 'f' => "\xC",
  62. 'r' => "\r",
  63. 'e' => "\x1B",
  64. ' ' => ' ',
  65. '"' => '"',
  66. '/' => '/',
  67. '\\' => '\\',
  68. // U+0085 NEXT LINE
  69. 'N' => "\xC2\x85",
  70. // U+00A0 NO-BREAK SPACE
  71. '_' => "\xC2\xA0",
  72. // U+2028 LINE SEPARATOR
  73. 'L' => "\xE2\x80\xA8",
  74. // U+2029 PARAGRAPH SEPARATOR
  75. 'P' => "\xE2\x80\xA9",
  76. 'x' => self::utf8chr(hexdec(substr($value, 2, 2))),
  77. 'u' => self::utf8chr(hexdec(substr($value, 2, 4))),
  78. 'U' => self::utf8chr(hexdec(substr($value, 2, 8))),
  79. default => throw new ParseException(sprintf('Found unknown escape character "%s".', $value)),
  80. };
  81. }
  82. /**
  83. * Get the UTF-8 character for the given code point.
  84. */
  85. private static function utf8chr(int $c): string
  86. {
  87. if (0x80 > $c %= 0x200000) {
  88. return \chr($c);
  89. }
  90. if (0x800 > $c) {
  91. return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F);
  92. }
  93. if (0x10000 > $c) {
  94. return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
  95. }
  96. return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
  97. }
  98. }