MessageFormatter.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. <?php
  2. /**
  3. * @link http://www.yiiframework.com/
  4. * @copyright Copyright (c) 2008 Yii Software LLC
  5. * @license http://www.yiiframework.com/license/
  6. */
  7. namespace yii\i18n;
  8. use Yii;
  9. use yii\base\Component;
  10. use yii\base\NotSupportedException;
  11. /**
  12. * MessageFormatter allows formatting messages via [ICU message format](http://userguide.icu-project.org/formatparse/messages).
  13. *
  14. * This class enhances the message formatter class provided by the PHP intl extension.
  15. *
  16. * The following enhancements are provided:
  17. *
  18. * - It accepts named arguments and mixed numeric and named arguments.
  19. * - Issues no error when an insufficient number of arguments have been provided. Instead, the placeholders will not be
  20. * substituted.
  21. * - Fixes PHP 5.5 weird placeholder replacement in case no arguments are provided at all (https://bugs.php.net/bug.php?id=65920).
  22. * - Offers limited support for message formatting in case PHP intl extension is not installed.
  23. * However it is highly recommended that you install [PHP intl extension](https://secure.php.net/manual/en/book.intl.php) if you want
  24. * to use MessageFormatter features.
  25. *
  26. * The fallback implementation only supports the following message formats:
  27. * - plural formatting for english ('one' and 'other' selectors)
  28. * - select format
  29. * - simple parameters
  30. * - integer number parameters
  31. *
  32. * The fallback implementation does NOT support the ['apostrophe-friendly' syntax](https://secure.php.net/manual/en/messageformatter.formatmessage.php).
  33. * Also messages that are working with the fallback implementation are not necessarily compatible with the
  34. * PHP intl MessageFormatter so do not rely on the fallback if you are able to install intl extension somehow.
  35. *
  36. * @property string $errorCode Code of the last error. This property is read-only.
  37. * @property string $errorMessage Description of the last error. This property is read-only.
  38. *
  39. * @author Alexander Makarov <sam@rmcreative.ru>
  40. * @author Carsten Brandt <mail@cebe.cc>
  41. * @since 2.0
  42. */
  43. class MessageFormatter extends Component
  44. {
  45. private $_errorCode = 0;
  46. private $_errorMessage = '';
  47. /**
  48. * Get the error code from the last operation.
  49. * @link https://secure.php.net/manual/en/messageformatter.geterrorcode.php
  50. * @return string Code of the last error.
  51. */
  52. public function getErrorCode()
  53. {
  54. return $this->_errorCode;
  55. }
  56. /**
  57. * Get the error text from the last operation.
  58. * @link https://secure.php.net/manual/en/messageformatter.geterrormessage.php
  59. * @return string Description of the last error.
  60. */
  61. public function getErrorMessage()
  62. {
  63. return $this->_errorMessage;
  64. }
  65. /**
  66. * Formats a message via [ICU message format](http://userguide.icu-project.org/formatparse/messages).
  67. *
  68. * It uses the PHP intl extension's [MessageFormatter](https://secure.php.net/manual/en/class.messageformatter.php)
  69. * and works around some issues.
  70. * If PHP intl is not installed a fallback will be used that supports a subset of the ICU message format.
  71. *
  72. * @param string $pattern The pattern string to insert parameters into.
  73. * @param array $params The array of name value pairs to insert into the format string.
  74. * @param string $language The locale to use for formatting locale-dependent parts
  75. * @return string|false The formatted pattern string or `false` if an error occurred
  76. */
  77. public function format($pattern, $params, $language)
  78. {
  79. $this->_errorCode = 0;
  80. $this->_errorMessage = '';
  81. if ($params === []) {
  82. return $pattern;
  83. }
  84. if (!class_exists('MessageFormatter', false)) {
  85. return $this->fallbackFormat($pattern, $params, $language);
  86. }
  87. // replace named arguments (https://github.com/yiisoft/yii2/issues/9678)
  88. $newParams = [];
  89. $pattern = $this->replaceNamedArguments($pattern, $params, $newParams);
  90. $params = $newParams;
  91. try {
  92. $formatter = new \MessageFormatter($language, $pattern);
  93. if ($formatter === null) {
  94. // formatter may be null in PHP 5.x
  95. $this->_errorCode = intl_get_error_code();
  96. $this->_errorMessage = 'Message pattern is invalid: ' . intl_get_error_message();
  97. return false;
  98. }
  99. } catch (\IntlException $e) {
  100. // IntlException is thrown since PHP 7
  101. $this->_errorCode = $e->getCode();
  102. $this->_errorMessage = 'Message pattern is invalid: ' . $e->getMessage();
  103. return false;
  104. } catch (\Exception $e) {
  105. // Exception is thrown by HHVM
  106. $this->_errorCode = $e->getCode();
  107. $this->_errorMessage = 'Message pattern is invalid: ' . $e->getMessage();
  108. return false;
  109. }
  110. $result = $formatter->format($params);
  111. if ($result === false) {
  112. $this->_errorCode = $formatter->getErrorCode();
  113. $this->_errorMessage = $formatter->getErrorMessage();
  114. return false;
  115. }
  116. return $result;
  117. }
  118. /**
  119. * Parses an input string according to an [ICU message format](http://userguide.icu-project.org/formatparse/messages) pattern.
  120. *
  121. * It uses the PHP intl extension's [MessageFormatter::parse()](https://secure.php.net/manual/en/messageformatter.parsemessage.php)
  122. * and adds support for named arguments.
  123. * Usage of this method requires PHP intl extension to be installed.
  124. *
  125. * @param string $pattern The pattern to use for parsing the message.
  126. * @param string $message The message to parse, conforming to the pattern.
  127. * @param string $language The locale to use for formatting locale-dependent parts
  128. * @return array|bool An array containing items extracted, or `FALSE` on error.
  129. * @throws \yii\base\NotSupportedException when PHP intl extension is not installed.
  130. */
  131. public function parse($pattern, $message, $language)
  132. {
  133. $this->_errorCode = 0;
  134. $this->_errorMessage = '';
  135. if (!class_exists('MessageFormatter', false)) {
  136. throw new NotSupportedException('You have to install PHP intl extension to use this feature.');
  137. }
  138. // replace named arguments
  139. if (($tokens = self::tokenizePattern($pattern)) === false) {
  140. $this->_errorCode = -1;
  141. $this->_errorMessage = 'Message pattern is invalid.';
  142. return false;
  143. }
  144. $map = [];
  145. foreach ($tokens as $i => $token) {
  146. if (is_array($token)) {
  147. $param = trim($token[0]);
  148. if (!isset($map[$param])) {
  149. $map[$param] = count($map);
  150. }
  151. $token[0] = $map[$param];
  152. $tokens[$i] = '{' . implode(',', $token) . '}';
  153. }
  154. }
  155. $pattern = implode('', $tokens);
  156. $map = array_flip($map);
  157. $formatter = new \MessageFormatter($language, $pattern);
  158. if ($formatter === null) {
  159. $this->_errorCode = -1;
  160. $this->_errorMessage = 'Message pattern is invalid.';
  161. return false;
  162. }
  163. $result = $formatter->parse($message);
  164. if ($result === false) {
  165. $this->_errorCode = $formatter->getErrorCode();
  166. $this->_errorMessage = $formatter->getErrorMessage();
  167. return false;
  168. }
  169. $values = [];
  170. foreach ($result as $key => $value) {
  171. $values[$map[$key]] = $value;
  172. }
  173. return $values;
  174. }
  175. /**
  176. * Replace named placeholders with numeric placeholders and quote unused.
  177. *
  178. * @param string $pattern The pattern string to replace things into.
  179. * @param array $givenParams The array of values to insert into the format string.
  180. * @param array $resultingParams Modified array of parameters.
  181. * @param array $map
  182. * @return string The pattern string with placeholders replaced.
  183. */
  184. private function replaceNamedArguments($pattern, $givenParams, &$resultingParams = [], &$map = [])
  185. {
  186. if (($tokens = self::tokenizePattern($pattern)) === false) {
  187. return false;
  188. }
  189. foreach ($tokens as $i => $token) {
  190. if (!is_array($token)) {
  191. continue;
  192. }
  193. $param = trim($token[0]);
  194. if (array_key_exists($param, $givenParams)) {
  195. // if param is given, replace it with a number
  196. if (!isset($map[$param])) {
  197. $map[$param] = count($map);
  198. // make sure only used params are passed to format method
  199. $resultingParams[$map[$param]] = $givenParams[$param];
  200. }
  201. $token[0] = $map[$param];
  202. $quote = '';
  203. } else {
  204. // quote unused token
  205. $quote = "'";
  206. }
  207. $type = isset($token[1]) ? trim($token[1]) : 'none';
  208. // replace plural and select format recursively
  209. if ($type === 'plural' || $type === 'select') {
  210. if (!isset($token[2])) {
  211. return false;
  212. }
  213. if (($subtokens = self::tokenizePattern($token[2])) === false) {
  214. return false;
  215. }
  216. $c = count($subtokens);
  217. for ($k = 0; $k + 1 < $c; $k++) {
  218. if (is_array($subtokens[$k]) || !is_array($subtokens[++$k])) {
  219. return false;
  220. }
  221. $subpattern = $this->replaceNamedArguments(implode(',', $subtokens[$k]), $givenParams, $resultingParams, $map);
  222. $subtokens[$k] = $quote . '{' . $quote . $subpattern . $quote . '}' . $quote;
  223. }
  224. $token[2] = implode('', $subtokens);
  225. }
  226. $tokens[$i] = $quote . '{' . $quote . implode(',', $token) . $quote . '}' . $quote;
  227. }
  228. return implode('', $tokens);
  229. }
  230. /**
  231. * Fallback implementation for MessageFormatter::formatMessage.
  232. * @param string $pattern The pattern string to insert things into.
  233. * @param array $args The array of values to insert into the format string
  234. * @param string $locale The locale to use for formatting locale-dependent parts
  235. * @return false|string The formatted pattern string or `false` if an error occurred
  236. */
  237. protected function fallbackFormat($pattern, $args, $locale)
  238. {
  239. if (($tokens = self::tokenizePattern($pattern)) === false) {
  240. $this->_errorCode = -1;
  241. $this->_errorMessage = 'Message pattern is invalid.';
  242. return false;
  243. }
  244. foreach ($tokens as $i => $token) {
  245. if (is_array($token)) {
  246. if (($tokens[$i] = $this->parseToken($token, $args, $locale)) === false) {
  247. $this->_errorCode = -1;
  248. $this->_errorMessage = 'Message pattern is invalid.';
  249. return false;
  250. }
  251. }
  252. }
  253. return implode('', $tokens);
  254. }
  255. /**
  256. * Tokenizes a pattern by separating normal text from replaceable patterns.
  257. * @param string $pattern patter to tokenize
  258. * @return array|bool array of tokens or false on failure
  259. */
  260. private static function tokenizePattern($pattern)
  261. {
  262. $charset = Yii::$app ? Yii::$app->charset : 'UTF-8';
  263. $depth = 1;
  264. if (($start = $pos = mb_strpos($pattern, '{', 0, $charset)) === false) {
  265. return [$pattern];
  266. }
  267. $tokens = [mb_substr($pattern, 0, $pos, $charset)];
  268. while (true) {
  269. $open = mb_strpos($pattern, '{', $pos + 1, $charset);
  270. $close = mb_strpos($pattern, '}', $pos + 1, $charset);
  271. if ($open === false && $close === false) {
  272. break;
  273. }
  274. if ($open === false) {
  275. $open = mb_strlen($pattern, $charset);
  276. }
  277. if ($close > $open) {
  278. $depth++;
  279. $pos = $open;
  280. } else {
  281. $depth--;
  282. $pos = $close;
  283. }
  284. if ($depth === 0) {
  285. $tokens[] = explode(',', mb_substr($pattern, $start + 1, $pos - $start - 1, $charset), 3);
  286. $start = $pos + 1;
  287. $tokens[] = mb_substr($pattern, $start, $open - $start, $charset);
  288. $start = $open;
  289. }
  290. if ($depth !== 0 && ($open === false || $close === false)) {
  291. break;
  292. }
  293. }
  294. if ($depth !== 0) {
  295. return false;
  296. }
  297. return $tokens;
  298. }
  299. /**
  300. * Parses a token.
  301. * @param array $token the token to parse
  302. * @param array $args arguments to replace
  303. * @param string $locale the locale
  304. * @return bool|string parsed token or false on failure
  305. * @throws \yii\base\NotSupportedException when unsupported formatting is used.
  306. */
  307. private function parseToken($token, $args, $locale)
  308. {
  309. // parsing pattern based on ICU grammar:
  310. // http://icu-project.org/apiref/icu4c/classMessageFormat.html#details
  311. $charset = Yii::$app ? Yii::$app->charset : 'UTF-8';
  312. $param = trim($token[0]);
  313. if (isset($args[$param])) {
  314. $arg = $args[$param];
  315. } else {
  316. return '{' . implode(',', $token) . '}';
  317. }
  318. $type = isset($token[1]) ? trim($token[1]) : 'none';
  319. switch ($type) {
  320. case 'date':
  321. case 'time':
  322. case 'spellout':
  323. case 'ordinal':
  324. case 'duration':
  325. case 'choice':
  326. case 'selectordinal':
  327. throw new NotSupportedException("Message format '$type' is not supported. You have to install PHP intl extension to use this feature.");
  328. case 'number':
  329. $format = isset($token[2]) ? trim($token[2]) : null;
  330. if (is_numeric($arg) && ($format === null || $format === 'integer')) {
  331. $number = number_format($arg);
  332. if ($format === null && ($pos = strpos($arg, '.')) !== false) {
  333. // add decimals with unknown length
  334. $number .= '.' . substr($arg, $pos + 1);
  335. }
  336. return $number;
  337. }
  338. throw new NotSupportedException("Message format 'number' is only supported for integer values. You have to install PHP intl extension to use this feature.");
  339. case 'none':
  340. return $arg;
  341. case 'select':
  342. /* http://icu-project.org/apiref/icu4c/classicu_1_1SelectFormat.html
  343. selectStyle = (selector '{' message '}')+
  344. */
  345. if (!isset($token[2])) {
  346. return false;
  347. }
  348. $select = self::tokenizePattern($token[2]);
  349. $c = count($select);
  350. $message = false;
  351. for ($i = 0; $i + 1 < $c; $i++) {
  352. if (is_array($select[$i]) || !is_array($select[$i + 1])) {
  353. return false;
  354. }
  355. $selector = trim($select[$i++]);
  356. if ($message === false && $selector === 'other' || $selector == $arg) {
  357. $message = implode(',', $select[$i]);
  358. }
  359. }
  360. if ($message !== false) {
  361. return $this->fallbackFormat($message, $args, $locale);
  362. }
  363. break;
  364. case 'plural':
  365. /* http://icu-project.org/apiref/icu4c/classicu_1_1PluralFormat.html
  366. pluralStyle = [offsetValue] (selector '{' message '}')+
  367. offsetValue = "offset:" number
  368. selector = explicitValue | keyword
  369. explicitValue = '=' number // adjacent, no white space in between
  370. keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
  371. message: see MessageFormat
  372. */
  373. if (!isset($token[2])) {
  374. return false;
  375. }
  376. $plural = self::tokenizePattern($token[2]);
  377. $c = count($plural);
  378. $message = false;
  379. $offset = 0;
  380. for ($i = 0; $i + 1 < $c; $i++) {
  381. if (is_array($plural[$i]) || !is_array($plural[$i + 1])) {
  382. return false;
  383. }
  384. $selector = trim($plural[$i++]);
  385. if ($i == 1 && strncmp($selector, 'offset:', 7) === 0) {
  386. $offset = (int) trim(mb_substr($selector, 7, ($pos = mb_strpos(str_replace(["\n", "\r", "\t"], ' ', $selector), ' ', 7, $charset)) - 7, $charset));
  387. $selector = trim(mb_substr($selector, $pos + 1, mb_strlen($selector, $charset), $charset));
  388. }
  389. if ($message === false && $selector === 'other' ||
  390. $selector[0] === '=' && (int) mb_substr($selector, 1, mb_strlen($selector, $charset), $charset) === $arg ||
  391. $selector === 'one' && $arg - $offset == 1
  392. ) {
  393. $message = implode(',', str_replace('#', $arg - $offset, $plural[$i]));
  394. }
  395. }
  396. if ($message !== false) {
  397. return $this->fallbackFormat($message, $args, $locale);
  398. }
  399. break;
  400. }
  401. return false;
  402. }
  403. }