Parser.php 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. use Symfony\Component\Yaml\Exception\ParseException;
  12. use Symfony\Component\Yaml\Tag\TaggedValue;
  13. /**
  14. * Parser parses YAML strings to convert them to PHP arrays.
  15. *
  16. * @author Fabien Potencier <fabien@symfony.com>
  17. *
  18. * @final
  19. */
  20. class Parser
  21. {
  22. const TAG_PATTERN = '(?P<tag>![\w!.\/:-]+)';
  23. const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?';
  24. private $filename;
  25. private $offset = 0;
  26. private $totalNumberOfLines;
  27. private $lines = [];
  28. private $currentLineNb = -1;
  29. private $currentLine = '';
  30. private $refs = [];
  31. private $skippedLineNumbers = [];
  32. private $locallySkippedLineNumbers = [];
  33. private $refsBeingParsed = [];
  34. /**
  35. * Parses a YAML file into a PHP value.
  36. *
  37. * @param string $filename The path to the YAML file to be parsed
  38. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  39. *
  40. * @return mixed The YAML converted to a PHP value
  41. *
  42. * @throws ParseException If the file could not be read or the YAML is not valid
  43. */
  44. public function parseFile(string $filename, int $flags = 0)
  45. {
  46. if (!is_file($filename)) {
  47. throw new ParseException(sprintf('File "%s" does not exist.', $filename));
  48. }
  49. if (!is_readable($filename)) {
  50. throw new ParseException(sprintf('File "%s" cannot be read.', $filename));
  51. }
  52. $this->filename = $filename;
  53. try {
  54. return $this->parse(file_get_contents($filename), $flags);
  55. } finally {
  56. $this->filename = null;
  57. }
  58. }
  59. /**
  60. * Parses a YAML string to a PHP value.
  61. *
  62. * @param string $value A YAML string
  63. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  64. *
  65. * @return mixed A PHP value
  66. *
  67. * @throws ParseException If the YAML is not valid
  68. */
  69. public function parse(string $value, int $flags = 0)
  70. {
  71. if (false === preg_match('//u', $value)) {
  72. throw new ParseException('The YAML value does not appear to be valid UTF-8.', -1, null, $this->filename);
  73. }
  74. $this->refs = [];
  75. $mbEncoding = null;
  76. $data = null;
  77. if (2 /* MB_OVERLOAD_STRING */ & (int) ini_get('mbstring.func_overload')) {
  78. $mbEncoding = mb_internal_encoding();
  79. mb_internal_encoding('UTF-8');
  80. }
  81. try {
  82. $data = $this->doParse($value, $flags);
  83. } finally {
  84. if (null !== $mbEncoding) {
  85. mb_internal_encoding($mbEncoding);
  86. }
  87. $this->lines = [];
  88. $this->currentLine = '';
  89. $this->refs = [];
  90. $this->skippedLineNumbers = [];
  91. $this->locallySkippedLineNumbers = [];
  92. }
  93. return $data;
  94. }
  95. /**
  96. * @internal
  97. *
  98. * @return int
  99. */
  100. public function getLastLineNumberBeforeDeprecation(): int
  101. {
  102. return $this->getRealCurrentLineNb();
  103. }
  104. private function doParse(string $value, int $flags)
  105. {
  106. $this->currentLineNb = -1;
  107. $this->currentLine = '';
  108. $value = $this->cleanup($value);
  109. $this->lines = explode("\n", $value);
  110. $this->locallySkippedLineNumbers = [];
  111. if (null === $this->totalNumberOfLines) {
  112. $this->totalNumberOfLines = \count($this->lines);
  113. }
  114. if (!$this->moveToNextLine()) {
  115. return null;
  116. }
  117. $data = [];
  118. $context = null;
  119. $allowOverwrite = false;
  120. while ($this->isCurrentLineEmpty()) {
  121. if (!$this->moveToNextLine()) {
  122. return null;
  123. }
  124. }
  125. // Resolves the tag and returns if end of the document
  126. if (null !== ($tag = $this->getLineTag($this->currentLine, $flags, false)) && !$this->moveToNextLine()) {
  127. return new TaggedValue($tag, '');
  128. }
  129. do {
  130. if ($this->isCurrentLineEmpty()) {
  131. continue;
  132. }
  133. // tab?
  134. if ("\t" === $this->currentLine[0]) {
  135. throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  136. }
  137. Inline::initialize($flags, $this->getRealCurrentLineNb(), $this->filename);
  138. $isRef = $mergeNode = false;
  139. if ('-' === $this->currentLine[0] && self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
  140. if ($context && 'mapping' == $context) {
  141. throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  142. }
  143. $context = 'sequence';
  144. if (isset($values['value']) && '&' === $values['value'][0] && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
  145. $isRef = $matches['ref'];
  146. $this->refsBeingParsed[] = $isRef;
  147. $values['value'] = $matches['value'];
  148. }
  149. if (isset($values['value'][1]) && '?' === $values['value'][0] && ' ' === $values['value'][1]) {
  150. throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  151. }
  152. // array
  153. if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
  154. $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true) ?? '', $flags);
  155. } elseif (null !== $subTag = $this->getLineTag(ltrim($values['value'], ' '), $flags)) {
  156. $data[] = new TaggedValue(
  157. $subTag,
  158. $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $flags)
  159. );
  160. } else {
  161. if (isset($values['leadspaces'])
  162. && self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->trimTag($values['value']), $matches)
  163. ) {
  164. // this is a compact notation element, add to next block and parse
  165. $block = $values['value'];
  166. if ($this->isNextLineIndented()) {
  167. $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + \strlen($values['leadspaces']) + 1);
  168. }
  169. $data[] = $this->parseBlock($this->getRealCurrentLineNb(), $block, $flags);
  170. } else {
  171. $data[] = $this->parseValue($values['value'], $flags, $context);
  172. }
  173. }
  174. if ($isRef) {
  175. $this->refs[$isRef] = end($data);
  176. array_pop($this->refsBeingParsed);
  177. }
  178. } elseif (
  179. self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:(\s++(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
  180. && (false === strpos($values['key'], ' #') || \in_array($values['key'][0], ['"', "'"]))
  181. ) {
  182. if ($context && 'sequence' == $context) {
  183. throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine, $this->filename);
  184. }
  185. $context = 'mapping';
  186. try {
  187. $key = Inline::parseScalar($values['key']);
  188. } catch (ParseException $e) {
  189. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  190. $e->setSnippet($this->currentLine);
  191. throw $e;
  192. }
  193. if (!\is_string($key) && !\is_int($key)) {
  194. throw new ParseException(sprintf('%s keys are not supported. Quote your evaluable mapping keys instead.', is_numeric($key) ? 'Numeric' : 'Non-string'), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  195. }
  196. // Convert float keys to strings, to avoid being converted to integers by PHP
  197. if (\is_float($key)) {
  198. $key = (string) $key;
  199. }
  200. if ('<<' === $key && (!isset($values['value']) || '&' !== $values['value'][0] || !self::preg_match('#^&(?P<ref>[^ ]+)#u', $values['value'], $refMatches))) {
  201. $mergeNode = true;
  202. $allowOverwrite = true;
  203. if (isset($values['value'][0]) && '*' === $values['value'][0]) {
  204. $refName = substr(rtrim($values['value']), 1);
  205. if (!\array_key_exists($refName, $this->refs)) {
  206. if (false !== $pos = array_search($refName, $this->refsBeingParsed, true)) {
  207. throw new ParseException(sprintf('Circular reference [%s, %s] detected for reference "%s".', implode(', ', \array_slice($this->refsBeingParsed, $pos)), $refName, $refName), $this->currentLineNb + 1, $this->currentLine, $this->filename);
  208. }
  209. throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  210. }
  211. $refValue = $this->refs[$refName];
  212. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $refValue instanceof \stdClass) {
  213. $refValue = (array) $refValue;
  214. }
  215. if (!\is_array($refValue)) {
  216. throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  217. }
  218. $data += $refValue; // array union
  219. } else {
  220. if (isset($values['value']) && '' !== $values['value']) {
  221. $value = $values['value'];
  222. } else {
  223. $value = $this->getNextEmbedBlock();
  224. }
  225. $parsed = $this->parseBlock($this->getRealCurrentLineNb() + 1, $value, $flags);
  226. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsed instanceof \stdClass) {
  227. $parsed = (array) $parsed;
  228. }
  229. if (!\is_array($parsed)) {
  230. throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  231. }
  232. if (isset($parsed[0])) {
  233. // If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes
  234. // and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier
  235. // in the sequence override keys specified in later mapping nodes.
  236. foreach ($parsed as $parsedItem) {
  237. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsedItem instanceof \stdClass) {
  238. $parsedItem = (array) $parsedItem;
  239. }
  240. if (!\is_array($parsedItem)) {
  241. throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem, $this->filename);
  242. }
  243. $data += $parsedItem; // array union
  244. }
  245. } else {
  246. // If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the
  247. // current mapping, unless the key already exists in it.
  248. $data += $parsed; // array union
  249. }
  250. }
  251. } elseif ('<<' !== $key && isset($values['value']) && '&' === $values['value'][0] && self::preg_match('#^&(?P<ref>[^ ]++) *+(?P<value>.*)#u', $values['value'], $matches)) {
  252. $isRef = $matches['ref'];
  253. $this->refsBeingParsed[] = $isRef;
  254. $values['value'] = $matches['value'];
  255. }
  256. $subTag = null;
  257. if ($mergeNode) {
  258. // Merge keys
  259. } elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
  260. // hash
  261. // if next line is less indented or equal, then it means that the current value is null
  262. if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
  263. // Spec: Keys MUST be unique; first one wins.
  264. // But overwriting is allowed when a merge node is used in current block.
  265. if ($allowOverwrite || !isset($data[$key])) {
  266. if (null !== $subTag) {
  267. $data[$key] = new TaggedValue($subTag, '');
  268. } else {
  269. $data[$key] = null;
  270. }
  271. } else {
  272. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  273. }
  274. } else {
  275. // remember the parsed line number here in case we need it to provide some contexts in error messages below
  276. $realCurrentLineNbKey = $this->getRealCurrentLineNb();
  277. $value = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(), $flags);
  278. if ('<<' === $key) {
  279. $this->refs[$refMatches['ref']] = $value;
  280. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $value instanceof \stdClass) {
  281. $value = (array) $value;
  282. }
  283. $data += $value;
  284. } elseif ($allowOverwrite || !isset($data[$key])) {
  285. // Spec: Keys MUST be unique; first one wins.
  286. // But overwriting is allowed when a merge node is used in current block.
  287. if (null !== $subTag) {
  288. $data[$key] = new TaggedValue($subTag, $value);
  289. } else {
  290. $data[$key] = $value;
  291. }
  292. } else {
  293. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $realCurrentLineNbKey + 1, $this->currentLine);
  294. }
  295. }
  296. } else {
  297. $value = $this->parseValue(rtrim($values['value']), $flags, $context);
  298. // Spec: Keys MUST be unique; first one wins.
  299. // But overwriting is allowed when a merge node is used in current block.
  300. if ($allowOverwrite || !isset($data[$key])) {
  301. $data[$key] = $value;
  302. } else {
  303. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  304. }
  305. }
  306. if ($isRef) {
  307. $this->refs[$isRef] = $data[$key];
  308. array_pop($this->refsBeingParsed);
  309. }
  310. } else {
  311. // multiple documents are not supported
  312. if ('---' === $this->currentLine) {
  313. throw new ParseException('Multiple documents are not supported.', $this->currentLineNb + 1, $this->currentLine, $this->filename);
  314. }
  315. if ($deprecatedUsage = (isset($this->currentLine[1]) && '?' === $this->currentLine[0] && ' ' === $this->currentLine[1])) {
  316. throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  317. }
  318. // 1-liner optionally followed by newline(s)
  319. if (\is_string($value) && $this->lines[0] === trim($value)) {
  320. try {
  321. $value = Inline::parse($this->lines[0], $flags, $this->refs);
  322. } catch (ParseException $e) {
  323. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  324. $e->setSnippet($this->currentLine);
  325. throw $e;
  326. }
  327. return $value;
  328. }
  329. // try to parse the value as a multi-line string as a last resort
  330. if (0 === $this->currentLineNb) {
  331. $previousLineWasNewline = false;
  332. $previousLineWasTerminatedWithBackslash = false;
  333. $value = '';
  334. foreach ($this->lines as $line) {
  335. // If the indentation is not consistent at offset 0, it is to be considered as a ParseError
  336. if (0 === $this->offset && !$deprecatedUsage && isset($line[0]) && ' ' === $line[0]) {
  337. throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  338. }
  339. if (false !== strpos($line, ': ')) {
  340. @trigger_error('Support for mapping keys in multi-line blocks is deprecated since Symfony 4.3 and will throw a ParseException in 5.0.', E_USER_DEPRECATED);
  341. }
  342. if ('' === trim($line)) {
  343. $value .= "\n";
  344. } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
  345. $value .= ' ';
  346. }
  347. if ('' !== trim($line) && '\\' === substr($line, -1)) {
  348. $value .= ltrim(substr($line, 0, -1));
  349. } elseif ('' !== trim($line)) {
  350. $value .= trim($line);
  351. }
  352. if ('' === trim($line)) {
  353. $previousLineWasNewline = true;
  354. $previousLineWasTerminatedWithBackslash = false;
  355. } elseif ('\\' === substr($line, -1)) {
  356. $previousLineWasNewline = false;
  357. $previousLineWasTerminatedWithBackslash = true;
  358. } else {
  359. $previousLineWasNewline = false;
  360. $previousLineWasTerminatedWithBackslash = false;
  361. }
  362. }
  363. try {
  364. return Inline::parse(trim($value));
  365. } catch (ParseException $e) {
  366. // fall-through to the ParseException thrown below
  367. }
  368. }
  369. throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  370. }
  371. } while ($this->moveToNextLine());
  372. if (null !== $tag) {
  373. $data = new TaggedValue($tag, $data);
  374. }
  375. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !\is_object($data) && 'mapping' === $context) {
  376. $object = new \stdClass();
  377. foreach ($data as $key => $value) {
  378. $object->$key = $value;
  379. }
  380. $data = $object;
  381. }
  382. return empty($data) ? null : $data;
  383. }
  384. private function parseBlock(int $offset, string $yaml, int $flags)
  385. {
  386. $skippedLineNumbers = $this->skippedLineNumbers;
  387. foreach ($this->locallySkippedLineNumbers as $lineNumber) {
  388. if ($lineNumber < $offset) {
  389. continue;
  390. }
  391. $skippedLineNumbers[] = $lineNumber;
  392. }
  393. $parser = new self();
  394. $parser->offset = $offset;
  395. $parser->totalNumberOfLines = $this->totalNumberOfLines;
  396. $parser->skippedLineNumbers = $skippedLineNumbers;
  397. $parser->refs = &$this->refs;
  398. $parser->refsBeingParsed = $this->refsBeingParsed;
  399. return $parser->doParse($yaml, $flags);
  400. }
  401. /**
  402. * Returns the current line number (takes the offset into account).
  403. *
  404. * @internal
  405. *
  406. * @return int The current line number
  407. */
  408. public function getRealCurrentLineNb(): int
  409. {
  410. $realCurrentLineNumber = $this->currentLineNb + $this->offset;
  411. foreach ($this->skippedLineNumbers as $skippedLineNumber) {
  412. if ($skippedLineNumber > $realCurrentLineNumber) {
  413. break;
  414. }
  415. ++$realCurrentLineNumber;
  416. }
  417. return $realCurrentLineNumber;
  418. }
  419. /**
  420. * Returns the current line indentation.
  421. *
  422. * @return int The current line indentation
  423. */
  424. private function getCurrentLineIndentation(): int
  425. {
  426. return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
  427. }
  428. /**
  429. * Returns the next embed block of YAML.
  430. *
  431. * @param int|null $indentation The indent level at which the block is to be read, or null for default
  432. * @param bool $inSequence True if the enclosing data structure is a sequence
  433. *
  434. * @return string A YAML string
  435. *
  436. * @throws ParseException When indentation problem are detected
  437. */
  438. private function getNextEmbedBlock(int $indentation = null, bool $inSequence = false): ?string
  439. {
  440. $oldLineIndentation = $this->getCurrentLineIndentation();
  441. if (!$this->moveToNextLine()) {
  442. return null;
  443. }
  444. if (null === $indentation) {
  445. $newIndent = null;
  446. $movements = 0;
  447. do {
  448. $EOF = false;
  449. // empty and comment-like lines do not influence the indentation depth
  450. if ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
  451. $EOF = !$this->moveToNextLine();
  452. if (!$EOF) {
  453. ++$movements;
  454. }
  455. } else {
  456. $newIndent = $this->getCurrentLineIndentation();
  457. }
  458. } while (!$EOF && null === $newIndent);
  459. for ($i = 0; $i < $movements; ++$i) {
  460. $this->moveToPreviousLine();
  461. }
  462. $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem();
  463. if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) {
  464. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  465. }
  466. } else {
  467. $newIndent = $indentation;
  468. }
  469. $data = [];
  470. if ($this->getCurrentLineIndentation() >= $newIndent) {
  471. $data[] = substr($this->currentLine, $newIndent);
  472. } elseif ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
  473. $data[] = $this->currentLine;
  474. } else {
  475. $this->moveToPreviousLine();
  476. return null;
  477. }
  478. if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && '-' === $data[0][0]) {
  479. // the previous line contained a dash but no item content, this line is a sequence item with the same indentation
  480. // and therefore no nested list or mapping
  481. $this->moveToPreviousLine();
  482. return null;
  483. }
  484. $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem();
  485. while ($this->moveToNextLine()) {
  486. $indent = $this->getCurrentLineIndentation();
  487. if ($isItUnindentedCollection && !$this->isCurrentLineEmpty() && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) {
  488. $this->moveToPreviousLine();
  489. break;
  490. }
  491. if ($this->isCurrentLineBlank()) {
  492. $data[] = substr($this->currentLine, $newIndent);
  493. continue;
  494. }
  495. if ($indent >= $newIndent) {
  496. $data[] = substr($this->currentLine, $newIndent);
  497. } elseif ($this->isCurrentLineComment()) {
  498. $data[] = $this->currentLine;
  499. } elseif (0 == $indent) {
  500. $this->moveToPreviousLine();
  501. break;
  502. } else {
  503. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  504. }
  505. }
  506. return implode("\n", $data);
  507. }
  508. /**
  509. * Moves the parser to the next line.
  510. *
  511. * @return bool
  512. */
  513. private function moveToNextLine(): bool
  514. {
  515. if ($this->currentLineNb >= \count($this->lines) - 1) {
  516. return false;
  517. }
  518. $this->currentLine = $this->lines[++$this->currentLineNb];
  519. return true;
  520. }
  521. /**
  522. * Moves the parser to the previous line.
  523. *
  524. * @return bool
  525. */
  526. private function moveToPreviousLine(): bool
  527. {
  528. if ($this->currentLineNb < 1) {
  529. return false;
  530. }
  531. $this->currentLine = $this->lines[--$this->currentLineNb];
  532. return true;
  533. }
  534. /**
  535. * Parses a YAML value.
  536. *
  537. * @param string $value A YAML value
  538. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  539. * @param string $context The parser context (either sequence or mapping)
  540. *
  541. * @return mixed A PHP value
  542. *
  543. * @throws ParseException When reference does not exist
  544. */
  545. private function parseValue(string $value, int $flags, string $context)
  546. {
  547. if (0 === strpos($value, '*')) {
  548. if (false !== $pos = strpos($value, '#')) {
  549. $value = substr($value, 1, $pos - 2);
  550. } else {
  551. $value = substr($value, 1);
  552. }
  553. if (!\array_key_exists($value, $this->refs)) {
  554. if (false !== $pos = array_search($value, $this->refsBeingParsed, true)) {
  555. throw new ParseException(sprintf('Circular reference [%s, %s] detected for reference "%s".', implode(', ', \array_slice($this->refsBeingParsed, $pos)), $value, $value), $this->currentLineNb + 1, $this->currentLine, $this->filename);
  556. }
  557. throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLineNb + 1, $this->currentLine, $this->filename);
  558. }
  559. return $this->refs[$value];
  560. }
  561. if (\in_array($value[0], ['!', '|', '>'], true) && self::preg_match('/^(?:'.self::TAG_PATTERN.' +)?'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
  562. $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
  563. $data = $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
  564. if ('' !== $matches['tag'] && '!' !== $matches['tag']) {
  565. if ('!!binary' === $matches['tag']) {
  566. return Inline::evaluateBinaryScalar($data);
  567. }
  568. return new TaggedValue(substr($matches['tag'], 1), $data);
  569. }
  570. return $data;
  571. }
  572. try {
  573. $quotation = '' !== $value && ('"' === $value[0] || "'" === $value[0]) ? $value[0] : null;
  574. // do not take following lines into account when the current line is a quoted single line value
  575. if (null !== $quotation && self::preg_match('/^'.$quotation.'.*'.$quotation.'(\s*#.*)?$/', $value)) {
  576. return Inline::parse($value, $flags, $this->refs);
  577. }
  578. $lines = [];
  579. while ($this->moveToNextLine()) {
  580. // unquoted strings end before the first unindented line
  581. if (null === $quotation && 0 === $this->getCurrentLineIndentation()) {
  582. $this->moveToPreviousLine();
  583. break;
  584. }
  585. $lines[] = trim($this->currentLine);
  586. // quoted string values end with a line that is terminated with the quotation character
  587. if ('' !== $this->currentLine && substr($this->currentLine, -1) === $quotation) {
  588. break;
  589. }
  590. }
  591. for ($i = 0, $linesCount = \count($lines), $previousLineBlank = false; $i < $linesCount; ++$i) {
  592. if ('' === $lines[$i]) {
  593. $value .= "\n";
  594. $previousLineBlank = true;
  595. } elseif ($previousLineBlank) {
  596. $value .= $lines[$i];
  597. $previousLineBlank = false;
  598. } else {
  599. $value .= ' '.$lines[$i];
  600. $previousLineBlank = false;
  601. }
  602. }
  603. Inline::$parsedLineNumber = $this->getRealCurrentLineNb();
  604. $parsedValue = Inline::parse($value, $flags, $this->refs);
  605. if ('mapping' === $context && \is_string($parsedValue) && '"' !== $value[0] && "'" !== $value[0] && '[' !== $value[0] && '{' !== $value[0] && '!' !== $value[0] && false !== strpos($parsedValue, ': ')) {
  606. throw new ParseException('A colon cannot be used in an unquoted mapping value.', $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  607. }
  608. return $parsedValue;
  609. } catch (ParseException $e) {
  610. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  611. $e->setSnippet($this->currentLine);
  612. throw $e;
  613. }
  614. }
  615. /**
  616. * Parses a block scalar.
  617. *
  618. * @param string $style The style indicator that was used to begin this block scalar (| or >)
  619. * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -)
  620. * @param int $indentation The indentation indicator that was used to begin this block scalar
  621. *
  622. * @return string The text value
  623. */
  624. private function parseBlockScalar(string $style, string $chomping = '', int $indentation = 0): string
  625. {
  626. $notEOF = $this->moveToNextLine();
  627. if (!$notEOF) {
  628. return '';
  629. }
  630. $isCurrentLineBlank = $this->isCurrentLineBlank();
  631. $blockLines = [];
  632. // leading blank lines are consumed before determining indentation
  633. while ($notEOF && $isCurrentLineBlank) {
  634. // newline only if not EOF
  635. if ($notEOF = $this->moveToNextLine()) {
  636. $blockLines[] = '';
  637. $isCurrentLineBlank = $this->isCurrentLineBlank();
  638. }
  639. }
  640. // determine indentation if not specified
  641. if (0 === $indentation) {
  642. $currentLineLength = \strlen($this->currentLine);
  643. for ($i = 0; $i < $currentLineLength && ' ' === $this->currentLine[$i]; ++$i) {
  644. ++$indentation;
  645. }
  646. }
  647. if ($indentation > 0) {
  648. $pattern = sprintf('/^ {%d}(.*)$/', $indentation);
  649. while (
  650. $notEOF && (
  651. $isCurrentLineBlank ||
  652. self::preg_match($pattern, $this->currentLine, $matches)
  653. )
  654. ) {
  655. if ($isCurrentLineBlank && \strlen($this->currentLine) > $indentation) {
  656. $blockLines[] = substr($this->currentLine, $indentation);
  657. } elseif ($isCurrentLineBlank) {
  658. $blockLines[] = '';
  659. } else {
  660. $blockLines[] = $matches[1];
  661. }
  662. // newline only if not EOF
  663. if ($notEOF = $this->moveToNextLine()) {
  664. $isCurrentLineBlank = $this->isCurrentLineBlank();
  665. }
  666. }
  667. } elseif ($notEOF) {
  668. $blockLines[] = '';
  669. }
  670. if ($notEOF) {
  671. $blockLines[] = '';
  672. $this->moveToPreviousLine();
  673. } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) {
  674. $blockLines[] = '';
  675. }
  676. // folded style
  677. if ('>' === $style) {
  678. $text = '';
  679. $previousLineIndented = false;
  680. $previousLineBlank = false;
  681. for ($i = 0, $blockLinesCount = \count($blockLines); $i < $blockLinesCount; ++$i) {
  682. if ('' === $blockLines[$i]) {
  683. $text .= "\n";
  684. $previousLineIndented = false;
  685. $previousLineBlank = true;
  686. } elseif (' ' === $blockLines[$i][0]) {
  687. $text .= "\n".$blockLines[$i];
  688. $previousLineIndented = true;
  689. $previousLineBlank = false;
  690. } elseif ($previousLineIndented) {
  691. $text .= "\n".$blockLines[$i];
  692. $previousLineIndented = false;
  693. $previousLineBlank = false;
  694. } elseif ($previousLineBlank || 0 === $i) {
  695. $text .= $blockLines[$i];
  696. $previousLineIndented = false;
  697. $previousLineBlank = false;
  698. } else {
  699. $text .= ' '.$blockLines[$i];
  700. $previousLineIndented = false;
  701. $previousLineBlank = false;
  702. }
  703. }
  704. } else {
  705. $text = implode("\n", $blockLines);
  706. }
  707. // deal with trailing newlines
  708. if ('' === $chomping) {
  709. $text = preg_replace('/\n+$/', "\n", $text);
  710. } elseif ('-' === $chomping) {
  711. $text = preg_replace('/\n+$/', '', $text);
  712. }
  713. return $text;
  714. }
  715. /**
  716. * Returns true if the next line is indented.
  717. *
  718. * @return bool Returns true if the next line is indented, false otherwise
  719. */
  720. private function isNextLineIndented(): bool
  721. {
  722. $currentIndentation = $this->getCurrentLineIndentation();
  723. $movements = 0;
  724. do {
  725. $EOF = !$this->moveToNextLine();
  726. if (!$EOF) {
  727. ++$movements;
  728. }
  729. } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
  730. if ($EOF) {
  731. return false;
  732. }
  733. $ret = $this->getCurrentLineIndentation() > $currentIndentation;
  734. for ($i = 0; $i < $movements; ++$i) {
  735. $this->moveToPreviousLine();
  736. }
  737. return $ret;
  738. }
  739. /**
  740. * Returns true if the current line is blank or if it is a comment line.
  741. *
  742. * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise
  743. */
  744. private function isCurrentLineEmpty(): bool
  745. {
  746. return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
  747. }
  748. /**
  749. * Returns true if the current line is blank.
  750. *
  751. * @return bool Returns true if the current line is blank, false otherwise
  752. */
  753. private function isCurrentLineBlank(): bool
  754. {
  755. return '' == trim($this->currentLine, ' ');
  756. }
  757. /**
  758. * Returns true if the current line is a comment line.
  759. *
  760. * @return bool Returns true if the current line is a comment line, false otherwise
  761. */
  762. private function isCurrentLineComment(): bool
  763. {
  764. //checking explicitly the first char of the trim is faster than loops or strpos
  765. $ltrimmedLine = ltrim($this->currentLine, ' ');
  766. return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
  767. }
  768. private function isCurrentLineLastLineInDocument(): bool
  769. {
  770. return ($this->offset + $this->currentLineNb) >= ($this->totalNumberOfLines - 1);
  771. }
  772. /**
  773. * Cleanups a YAML string to be parsed.
  774. *
  775. * @param string $value The input YAML string
  776. *
  777. * @return string A cleaned up YAML string
  778. */
  779. private function cleanup(string $value): string
  780. {
  781. $value = str_replace(["\r\n", "\r"], "\n", $value);
  782. // strip YAML header
  783. $count = 0;
  784. $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count);
  785. $this->offset += $count;
  786. // remove leading comments
  787. $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
  788. if (1 === $count) {
  789. // items have been removed, update the offset
  790. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  791. $value = $trimmedValue;
  792. }
  793. // remove start of the document marker (---)
  794. $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
  795. if (1 === $count) {
  796. // items have been removed, update the offset
  797. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  798. $value = $trimmedValue;
  799. // remove end of the document marker (...)
  800. $value = preg_replace('#\.\.\.\s*$#', '', $value);
  801. }
  802. return $value;
  803. }
  804. /**
  805. * Returns true if the next line starts unindented collection.
  806. *
  807. * @return bool Returns true if the next line starts unindented collection, false otherwise
  808. */
  809. private function isNextLineUnIndentedCollection(): bool
  810. {
  811. $currentIndentation = $this->getCurrentLineIndentation();
  812. $movements = 0;
  813. do {
  814. $EOF = !$this->moveToNextLine();
  815. if (!$EOF) {
  816. ++$movements;
  817. }
  818. } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
  819. if ($EOF) {
  820. return false;
  821. }
  822. $ret = $this->getCurrentLineIndentation() === $currentIndentation && $this->isStringUnIndentedCollectionItem();
  823. for ($i = 0; $i < $movements; ++$i) {
  824. $this->moveToPreviousLine();
  825. }
  826. return $ret;
  827. }
  828. /**
  829. * Returns true if the string is un-indented collection item.
  830. *
  831. * @return bool Returns true if the string is un-indented collection item, false otherwise
  832. */
  833. private function isStringUnIndentedCollectionItem(): bool
  834. {
  835. return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- ');
  836. }
  837. /**
  838. * A local wrapper for "preg_match" which will throw a ParseException if there
  839. * is an internal error in the PCRE engine.
  840. *
  841. * This avoids us needing to check for "false" every time PCRE is used
  842. * in the YAML engine
  843. *
  844. * @throws ParseException on a PCRE internal error
  845. *
  846. * @see preg_last_error()
  847. *
  848. * @internal
  849. */
  850. public static function preg_match(string $pattern, string $subject, array &$matches = null, int $flags = 0, int $offset = 0): int
  851. {
  852. if (false === $ret = preg_match($pattern, $subject, $matches, $flags, $offset)) {
  853. switch (preg_last_error()) {
  854. case PREG_INTERNAL_ERROR:
  855. $error = 'Internal PCRE error.';
  856. break;
  857. case PREG_BACKTRACK_LIMIT_ERROR:
  858. $error = 'pcre.backtrack_limit reached.';
  859. break;
  860. case PREG_RECURSION_LIMIT_ERROR:
  861. $error = 'pcre.recursion_limit reached.';
  862. break;
  863. case PREG_BAD_UTF8_ERROR:
  864. $error = 'Malformed UTF-8 data.';
  865. break;
  866. case PREG_BAD_UTF8_OFFSET_ERROR:
  867. $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
  868. break;
  869. default:
  870. $error = 'Error.';
  871. }
  872. throw new ParseException($error);
  873. }
  874. return $ret;
  875. }
  876. /**
  877. * Trim the tag on top of the value.
  878. *
  879. * Prevent values such as "!foo {quz: bar}" to be considered as
  880. * a mapping block.
  881. */
  882. private function trimTag(string $value): string
  883. {
  884. if ('!' === $value[0]) {
  885. return ltrim(substr($value, 1, strcspn($value, " \r\n", 1)), ' ');
  886. }
  887. return $value;
  888. }
  889. private function getLineTag(string $value, int $flags, bool $nextLineCheck = true): ?string
  890. {
  891. if ('' === $value || '!' !== $value[0] || 1 !== self::preg_match('/^'.self::TAG_PATTERN.' *( +#.*)?$/', $value, $matches)) {
  892. return null;
  893. }
  894. if ($nextLineCheck && !$this->isNextLineIndented()) {
  895. return null;
  896. }
  897. $tag = substr($matches['tag'], 1);
  898. // Built-in tags
  899. if ($tag && '!' === $tag[0]) {
  900. throw new ParseException(sprintf('The built-in tag "!%s" is not implemented.', $tag), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  901. }
  902. if (Yaml::PARSE_CUSTOM_TAGS & $flags) {
  903. return $tag;
  904. }
  905. throw new ParseException(sprintf('Tags support is not enabled. You must use the flag "Yaml::PARSE_CUSTOM_TAGS" to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  906. }
  907. }