TokenizerPatterns.php 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector\Parser\Tokenizer;
  11. /**
  12. * CSS selector tokenizer patterns builder.
  13. *
  14. * This component is a port of the Python cssselect library,
  15. * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
  16. *
  17. * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
  18. *
  19. * @internal
  20. */
  21. class TokenizerPatterns
  22. {
  23. private $unicodeEscapePattern;
  24. private $simpleEscapePattern;
  25. private $newLineEscapePattern;
  26. private $escapePattern;
  27. private $stringEscapePattern;
  28. private $nonAsciiPattern;
  29. private $nmCharPattern;
  30. private $nmStartPattern;
  31. private $identifierPattern;
  32. private $hashPattern;
  33. private $numberPattern;
  34. private $quotedStringPattern;
  35. public function __construct()
  36. {
  37. $this->unicodeEscapePattern = '\\\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?';
  38. $this->simpleEscapePattern = '\\\\(.)';
  39. $this->newLineEscapePattern = '\\\\(?:\n|\r\n|\r|\f)';
  40. $this->escapePattern = $this->unicodeEscapePattern.'|\\\\[^\n\r\f0-9a-f]';
  41. $this->stringEscapePattern = $this->newLineEscapePattern.'|'.$this->escapePattern;
  42. $this->nonAsciiPattern = '[^\x00-\x7F]';
  43. $this->nmCharPattern = '[_a-z0-9-]|'.$this->escapePattern.'|'.$this->nonAsciiPattern;
  44. $this->nmStartPattern = '[_a-z]|'.$this->escapePattern.'|'.$this->nonAsciiPattern;
  45. $this->identifierPattern = '-?(?:'.$this->nmStartPattern.')(?:'.$this->nmCharPattern.')*';
  46. $this->hashPattern = '#((?:'.$this->nmCharPattern.')+)';
  47. $this->numberPattern = '[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)';
  48. $this->quotedStringPattern = '([^\n\r\f%s]|'.$this->stringEscapePattern.')*';
  49. }
  50. public function getNewLineEscapePattern(): string
  51. {
  52. return '~^'.$this->newLineEscapePattern.'~';
  53. }
  54. public function getSimpleEscapePattern(): string
  55. {
  56. return '~^'.$this->simpleEscapePattern.'~';
  57. }
  58. public function getUnicodeEscapePattern(): string
  59. {
  60. return '~^'.$this->unicodeEscapePattern.'~i';
  61. }
  62. public function getIdentifierPattern(): string
  63. {
  64. return '~^'.$this->identifierPattern.'~i';
  65. }
  66. public function getHashPattern(): string
  67. {
  68. return '~^'.$this->hashPattern.'~i';
  69. }
  70. public function getNumberPattern(): string
  71. {
  72. return '~^'.$this->numberPattern.'~';
  73. }
  74. public function getQuotedStringPattern(string $quote): string
  75. {
  76. return '~^'.sprintf($this->quotedStringPattern, $quote).'~i';
  77. }
  78. }