BaseStringHelper.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. <?php
  2. /**
  3. * @link http://www.yiiframework.com/
  4. * @copyright Copyright (c) 2008 Yii Software LLC
  5. * @license http://www.yiiframework.com/license/
  6. */
  7. namespace yii\helpers;
  8. use Yii;
  9. /**
  10. * BaseStringHelper provides concrete implementation for [[StringHelper]].
  11. *
  12. * Do not use BaseStringHelper. Use [[StringHelper]] instead.
  13. *
  14. * @author Qiang Xue <qiang.xue@gmail.com>
  15. * @author Alex Makarov <sam@rmcreative.ru>
  16. * @since 2.0
  17. */
  18. class BaseStringHelper
  19. {
  20. /**
  21. * Returns the number of bytes in the given string.
  22. * This method ensures the string is treated as a byte array by using `mb_strlen()`.
  23. * @param string $string the string being measured for length
  24. * @return int the number of bytes in the given string.
  25. */
  26. public static function byteLength($string)
  27. {
  28. return mb_strlen($string, '8bit');
  29. }
  30. /**
  31. * Returns the portion of string specified by the start and length parameters.
  32. * This method ensures the string is treated as a byte array by using `mb_substr()`.
  33. * @param string $string the input string. Must be one character or longer.
  34. * @param int $start the starting position
  35. * @param int $length the desired portion length. If not specified or `null`, there will be
  36. * no limit on length i.e. the output will be until the end of the string.
  37. * @return string the extracted part of string, or FALSE on failure or an empty string.
  38. * @see https://secure.php.net/manual/en/function.substr.php
  39. */
  40. public static function byteSubstr($string, $start, $length = null)
  41. {
  42. return mb_substr($string, $start, $length === null ? mb_strlen($string, '8bit') : $length, '8bit');
  43. }
  44. /**
  45. * Returns the trailing name component of a path.
  46. * This method is similar to the php function `basename()` except that it will
  47. * treat both \ and / as directory separators, independent of the operating system.
  48. * This method was mainly created to work on php namespaces. When working with real
  49. * file paths, php's `basename()` should work fine for you.
  50. * Note: this method is not aware of the actual filesystem, or path components such as "..".
  51. *
  52. * @param string $path A path string.
  53. * @param string $suffix If the name component ends in suffix this will also be cut off.
  54. * @return string the trailing name component of the given path.
  55. * @see https://secure.php.net/manual/en/function.basename.php
  56. */
  57. public static function basename($path, $suffix = '')
  58. {
  59. if (($len = mb_strlen($suffix)) > 0 && mb_substr($path, -$len) === $suffix) {
  60. $path = mb_substr($path, 0, -$len);
  61. }
  62. $path = rtrim(str_replace('\\', '/', $path), '/\\');
  63. if (($pos = mb_strrpos($path, '/')) !== false) {
  64. return mb_substr($path, $pos + 1);
  65. }
  66. return $path;
  67. }
  68. /**
  69. * Returns parent directory's path.
  70. * This method is similar to `dirname()` except that it will treat
  71. * both \ and / as directory separators, independent of the operating system.
  72. *
  73. * @param string $path A path string.
  74. * @return string the parent directory's path.
  75. * @see https://secure.php.net/manual/en/function.basename.php
  76. */
  77. public static function dirname($path)
  78. {
  79. $pos = mb_strrpos(str_replace('\\', '/', $path), '/');
  80. if ($pos !== false) {
  81. return mb_substr($path, 0, $pos);
  82. }
  83. return '';
  84. }
  85. /**
  86. * Truncates a string to the number of characters specified.
  87. *
  88. * @param string $string The string to truncate.
  89. * @param int $length How many characters from original string to include into truncated string.
  90. * @param string $suffix String to append to the end of truncated string.
  91. * @param string $encoding The charset to use, defaults to charset currently used by application.
  92. * @param bool $asHtml Whether to treat the string being truncated as HTML and preserve proper HTML tags.
  93. * This parameter is available since version 2.0.1.
  94. * @return string the truncated string.
  95. */
  96. public static function truncate($string, $length, $suffix = '...', $encoding = null, $asHtml = false)
  97. {
  98. if ($encoding === null) {
  99. $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8';
  100. }
  101. if ($asHtml) {
  102. return static::truncateHtml($string, $length, $suffix, $encoding);
  103. }
  104. if (mb_strlen($string, $encoding) > $length) {
  105. return rtrim(mb_substr($string, 0, $length, $encoding)) . $suffix;
  106. }
  107. return $string;
  108. }
  109. /**
  110. * Truncates a string to the number of words specified.
  111. *
  112. * @param string $string The string to truncate.
  113. * @param int $count How many words from original string to include into truncated string.
  114. * @param string $suffix String to append to the end of truncated string.
  115. * @param bool $asHtml Whether to treat the string being truncated as HTML and preserve proper HTML tags.
  116. * This parameter is available since version 2.0.1.
  117. * @return string the truncated string.
  118. */
  119. public static function truncateWords($string, $count, $suffix = '...', $asHtml = false)
  120. {
  121. if ($asHtml) {
  122. return static::truncateHtml($string, $count, $suffix);
  123. }
  124. $words = preg_split('/(\s+)/u', trim($string), null, PREG_SPLIT_DELIM_CAPTURE);
  125. if (count($words) / 2 > $count) {
  126. return implode('', array_slice($words, 0, ($count * 2) - 1)) . $suffix;
  127. }
  128. return $string;
  129. }
  130. /**
  131. * Truncate a string while preserving the HTML.
  132. *
  133. * @param string $string The string to truncate
  134. * @param int $count
  135. * @param string $suffix String to append to the end of the truncated string.
  136. * @param string|bool $encoding
  137. * @return string
  138. * @since 2.0.1
  139. */
  140. protected static function truncateHtml($string, $count, $suffix, $encoding = false)
  141. {
  142. $config = \HTMLPurifier_Config::create(null);
  143. if (Yii::$app !== null) {
  144. $config->set('Cache.SerializerPath', Yii::$app->getRuntimePath());
  145. }
  146. $lexer = \HTMLPurifier_Lexer::create($config);
  147. $tokens = $lexer->tokenizeHTML($string, $config, new \HTMLPurifier_Context());
  148. $openTokens = [];
  149. $totalCount = 0;
  150. $depth = 0;
  151. $truncated = [];
  152. foreach ($tokens as $token) {
  153. if ($token instanceof \HTMLPurifier_Token_Start) { //Tag begins
  154. $openTokens[$depth] = $token->name;
  155. $truncated[] = $token;
  156. ++$depth;
  157. } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) { //Text
  158. if (false === $encoding) {
  159. preg_match('/^(\s*)/um', $token->data, $prefixSpace) ?: $prefixSpace = ['', ''];
  160. $token->data = $prefixSpace[1] . self::truncateWords(ltrim($token->data), $count - $totalCount, '');
  161. $currentCount = self::countWords($token->data);
  162. } else {
  163. $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding);
  164. $currentCount = mb_strlen($token->data, $encoding);
  165. }
  166. $totalCount += $currentCount;
  167. $truncated[] = $token;
  168. } elseif ($token instanceof \HTMLPurifier_Token_End) { //Tag ends
  169. if ($token->name === $openTokens[$depth - 1]) {
  170. --$depth;
  171. unset($openTokens[$depth]);
  172. $truncated[] = $token;
  173. }
  174. } elseif ($token instanceof \HTMLPurifier_Token_Empty) { //Self contained tags, i.e. <img/> etc.
  175. $truncated[] = $token;
  176. }
  177. if ($totalCount >= $count) {
  178. if (0 < count($openTokens)) {
  179. krsort($openTokens);
  180. foreach ($openTokens as $name) {
  181. $truncated[] = new \HTMLPurifier_Token_End($name);
  182. }
  183. }
  184. break;
  185. }
  186. }
  187. $context = new \HTMLPurifier_Context();
  188. $generator = new \HTMLPurifier_Generator($config, $context);
  189. return $generator->generateFromTokens($truncated) . ($totalCount >= $count ? $suffix : '');
  190. }
  191. /**
  192. * Check if given string starts with specified substring.
  193. * Binary and multibyte safe.
  194. *
  195. * @param string $string Input string
  196. * @param string $with Part to search inside the $string
  197. * @param bool $caseSensitive Case sensitive search. Default is true. When case sensitive is enabled, $with must exactly match the starting of the string in order to get a true value.
  198. * @return bool Returns true if first input starts with second input, false otherwise
  199. */
  200. public static function startsWith($string, $with, $caseSensitive = true)
  201. {
  202. if (!$bytes = static::byteLength($with)) {
  203. return true;
  204. }
  205. if ($caseSensitive) {
  206. return strncmp($string, $with, $bytes) === 0;
  207. }
  208. $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8';
  209. return mb_strtolower(mb_substr($string, 0, $bytes, '8bit'), $encoding) === mb_strtolower($with, $encoding);
  210. }
  211. /**
  212. * Check if given string ends with specified substring.
  213. * Binary and multibyte safe.
  214. *
  215. * @param string $string Input string to check
  216. * @param string $with Part to search inside of the $string.
  217. * @param bool $caseSensitive Case sensitive search. Default is true. When case sensitive is enabled, $with must exactly match the ending of the string in order to get a true value.
  218. * @return bool Returns true if first input ends with second input, false otherwise
  219. */
  220. public static function endsWith($string, $with, $caseSensitive = true)
  221. {
  222. if (!$bytes = static::byteLength($with)) {
  223. return true;
  224. }
  225. if ($caseSensitive) {
  226. // Warning check, see https://secure.php.net/manual/en/function.substr-compare.php#refsect1-function.substr-compare-returnvalues
  227. if (static::byteLength($string) < $bytes) {
  228. return false;
  229. }
  230. return substr_compare($string, $with, -$bytes, $bytes) === 0;
  231. }
  232. $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8';
  233. return mb_strtolower(mb_substr($string, -$bytes, mb_strlen($string, '8bit'), '8bit'), $encoding) === mb_strtolower($with, $encoding);
  234. }
  235. /**
  236. * Explodes string into array, optionally trims values and skips empty ones.
  237. *
  238. * @param string $string String to be exploded.
  239. * @param string $delimiter Delimiter. Default is ','.
  240. * @param mixed $trim Whether to trim each element. Can be:
  241. * - boolean - to trim normally;
  242. * - string - custom characters to trim. Will be passed as a second argument to `trim()` function.
  243. * - callable - will be called for each value instead of trim. Takes the only argument - value.
  244. * @param bool $skipEmpty Whether to skip empty strings between delimiters. Default is false.
  245. * @return array
  246. * @since 2.0.4
  247. */
  248. public static function explode($string, $delimiter = ',', $trim = true, $skipEmpty = false)
  249. {
  250. $result = explode($delimiter, $string);
  251. if ($trim !== false) {
  252. if ($trim === true) {
  253. $trim = 'trim';
  254. } elseif (!is_callable($trim)) {
  255. $trim = function ($v) use ($trim) {
  256. return trim($v, $trim);
  257. };
  258. }
  259. $result = array_map($trim, $result);
  260. }
  261. if ($skipEmpty) {
  262. // Wrapped with array_values to make array keys sequential after empty values removing
  263. $result = array_values(array_filter($result, function ($value) {
  264. return $value !== '';
  265. }));
  266. }
  267. return $result;
  268. }
  269. /**
  270. * Counts words in a string.
  271. * @since 2.0.8
  272. *
  273. * @param string $string
  274. * @return int
  275. */
  276. public static function countWords($string)
  277. {
  278. return count(preg_split('/\s+/u', $string, null, PREG_SPLIT_NO_EMPTY));
  279. }
  280. /**
  281. * Returns string representation of number value with replaced commas to dots, if decimal point
  282. * of current locale is comma.
  283. * @param int|float|string $value
  284. * @return string
  285. * @since 2.0.11
  286. */
  287. public static function normalizeNumber($value)
  288. {
  289. $value = (string)$value;
  290. $localeInfo = localeconv();
  291. $decimalSeparator = isset($localeInfo['decimal_point']) ? $localeInfo['decimal_point'] : null;
  292. if ($decimalSeparator !== null && $decimalSeparator !== '.') {
  293. $value = str_replace($decimalSeparator, '.', $value);
  294. }
  295. return $value;
  296. }
  297. /**
  298. * Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648).
  299. *
  300. * > Note: Base 64 padding `=` may be at the end of the returned string.
  301. * > `=` is not transparent to URL encoding.
  302. *
  303. * @see https://tools.ietf.org/html/rfc4648#page-7
  304. * @param string $input the string to encode.
  305. * @return string encoded string.
  306. * @since 2.0.12
  307. */
  308. public static function base64UrlEncode($input)
  309. {
  310. return strtr(base64_encode($input), '+/', '-_');
  311. }
  312. /**
  313. * Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648).
  314. *
  315. * @see https://tools.ietf.org/html/rfc4648#page-7
  316. * @param string $input encoded string.
  317. * @return string decoded string.
  318. * @since 2.0.12
  319. */
  320. public static function base64UrlDecode($input)
  321. {
  322. return base64_decode(strtr($input, '-_', '+/'));
  323. }
  324. /**
  325. * Safely casts a float to string independent of the current locale.
  326. *
  327. * The decimal separator will always be `.`.
  328. * @param float|int $number a floating point number or integer.
  329. * @return string the string representation of the number.
  330. * @since 2.0.13
  331. */
  332. public static function floatToString($number)
  333. {
  334. // . and , are the only decimal separators known in ICU data,
  335. // so its safe to call str_replace here
  336. return str_replace(',', '.', (string) $number);
  337. }
  338. /**
  339. * Checks if the passed string would match the given shell wildcard pattern.
  340. * This function emulates [[fnmatch()]], which may be unavailable at certain environment, using PCRE.
  341. * @param string $pattern the shell wildcard pattern.
  342. * @param string $string the tested string.
  343. * @param array $options options for matching. Valid options are:
  344. *
  345. * - caseSensitive: bool, whether pattern should be case sensitive. Defaults to `true`.
  346. * - escape: bool, whether backslash escaping is enabled. Defaults to `true`.
  347. * - filePath: bool, whether slashes in string only matches slashes in the given pattern. Defaults to `false`.
  348. *
  349. * @return bool whether the string matches pattern or not.
  350. * @since 2.0.14
  351. */
  352. public static function matchWildcard($pattern, $string, $options = [])
  353. {
  354. if ($pattern === '*' && empty($options['filePath'])) {
  355. return true;
  356. }
  357. $replacements = [
  358. '\\\\\\\\' => '\\\\',
  359. '\\\\\\*' => '[*]',
  360. '\\\\\\?' => '[?]',
  361. '\*' => '.*',
  362. '\?' => '.',
  363. '\[\!' => '[^',
  364. '\[' => '[',
  365. '\]' => ']',
  366. '\-' => '-',
  367. ];
  368. if (isset($options['escape']) && !$options['escape']) {
  369. unset($replacements['\\\\\\\\']);
  370. unset($replacements['\\\\\\*']);
  371. unset($replacements['\\\\\\?']);
  372. }
  373. if (!empty($options['filePath'])) {
  374. $replacements['\*'] = '[^/\\\\]*';
  375. $replacements['\?'] = '[^/\\\\]';
  376. }
  377. $pattern = strtr(preg_quote($pattern, '#'), $replacements);
  378. $pattern = '#^' . $pattern . '$#us';
  379. if (isset($options['caseSensitive']) && !$options['caseSensitive']) {
  380. $pattern .= 'i';
  381. }
  382. return preg_match($pattern, $string) === 1;
  383. }
  384. /**
  385. * This method provides a unicode-safe implementation of built-in PHP function `ucfirst()`.
  386. *
  387. * @param string $string the string to be proceeded
  388. * @param string $encoding Optional, defaults to "UTF-8"
  389. * @return string
  390. * @see https://secure.php.net/manual/en/function.ucfirst.php
  391. * @since 2.0.16
  392. */
  393. public static function mb_ucfirst($string, $encoding = 'UTF-8')
  394. {
  395. $firstChar = mb_substr($string, 0, 1, $encoding);
  396. $rest = mb_substr($string, 1, null, $encoding);
  397. return mb_strtoupper($firstChar, $encoding) . $rest;
  398. }
  399. /**
  400. * This method provides a unicode-safe implementation of built-in PHP function `ucwords()`.
  401. *
  402. * @param string $string the string to be proceeded
  403. * @param string $encoding Optional, defaults to "UTF-8"
  404. * @return string
  405. * @see https://secure.php.net/manual/en/function.ucwords.php
  406. * @since 2.0.16
  407. */
  408. public static function mb_ucwords($string, $encoding = 'UTF-8')
  409. {
  410. $words = preg_split("/\s/u", $string, -1, PREG_SPLIT_NO_EMPTY);
  411. $titelized = array_map(function ($word) use ($encoding) {
  412. return static::mb_ucfirst($word, $encoding);
  413. }, $words);
  414. return implode(' ', $titelized);
  415. }
  416. }