ByteString.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String;
  11. use Symfony\Component\String\Exception\ExceptionInterface;
  12. use Symfony\Component\String\Exception\InvalidArgumentException;
  13. use Symfony\Component\String\Exception\RuntimeException;
  14. /**
  15. * Represents a binary-safe string of bytes.
  16. *
  17. * @author Nicolas Grekas <p@tchwork.com>
  18. * @author Hugo Hamon <hugohamon@neuf.fr>
  19. *
  20. * @throws ExceptionInterface
  21. */
  22. class ByteString extends AbstractString
  23. {
  24. private const ALPHABET_ALPHANUMERIC = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
  25. public function __construct(string $string = '')
  26. {
  27. $this->string = $string;
  28. }
  29. /*
  30. * The following method was derived from code of the Hack Standard Library (v4.40 - 2020-05-03)
  31. *
  32. * https://github.com/hhvm/hsl/blob/80a42c02f036f72a42f0415e80d6b847f4bf62d5/src/random/private.php#L16
  33. *
  34. * Code subject to the MIT license (https://github.com/hhvm/hsl/blob/master/LICENSE).
  35. *
  36. * Copyright (c) 2004-2020, Facebook, Inc. (https://www.facebook.com/)
  37. */
  38. public static function fromRandom(int $length = 16, string $alphabet = null): self
  39. {
  40. if ($length <= 0) {
  41. throw new InvalidArgumentException(sprintf('A strictly positive length is expected, "%d" given.', $length));
  42. }
  43. $alphabet = $alphabet ?? self::ALPHABET_ALPHANUMERIC;
  44. $alphabetSize = \strlen($alphabet);
  45. $bits = (int) ceil(log($alphabetSize, 2.0));
  46. if ($bits <= 0 || $bits > 56) {
  47. throw new InvalidArgumentException('The length of the alphabet must in the [2^1, 2^56] range.');
  48. }
  49. $ret = '';
  50. while ($length > 0) {
  51. $urandomLength = (int) ceil(2 * $length * $bits / 8.0);
  52. $data = random_bytes($urandomLength);
  53. $unpackedData = 0;
  54. $unpackedBits = 0;
  55. for ($i = 0; $i < $urandomLength && $length > 0; ++$i) {
  56. // Unpack 8 bits
  57. $unpackedData = ($unpackedData << 8) | \ord($data[$i]);
  58. $unpackedBits += 8;
  59. // While we have enough bits to select a character from the alphabet, keep
  60. // consuming the random data
  61. for (; $unpackedBits >= $bits && $length > 0; $unpackedBits -= $bits) {
  62. $index = ($unpackedData & ((1 << $bits) - 1));
  63. $unpackedData >>= $bits;
  64. // Unfortunately, the alphabet size is not necessarily a power of two.
  65. // Worst case, it is 2^k + 1, which means we need (k+1) bits and we
  66. // have around a 50% chance of missing as k gets larger
  67. if ($index < $alphabetSize) {
  68. $ret .= $alphabet[$index];
  69. --$length;
  70. }
  71. }
  72. }
  73. }
  74. return new static($ret);
  75. }
  76. public function bytesAt(int $offset): array
  77. {
  78. $str = $this->string[$offset] ?? '';
  79. return '' === $str ? [] : [\ord($str)];
  80. }
  81. public function append(string ...$suffix): parent
  82. {
  83. $str = clone $this;
  84. $str->string .= 1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix);
  85. return $str;
  86. }
  87. public function camel(): parent
  88. {
  89. $str = clone $this;
  90. $str->string = lcfirst(str_replace(' ', '', ucwords(preg_replace('/[^a-zA-Z0-9\x7f-\xff]++/', ' ', $this->string))));
  91. return $str;
  92. }
  93. public function chunk(int $length = 1): array
  94. {
  95. if (1 > $length) {
  96. throw new InvalidArgumentException('The chunk length must be greater than zero.');
  97. }
  98. if ('' === $this->string) {
  99. return [];
  100. }
  101. $str = clone $this;
  102. $chunks = [];
  103. foreach (str_split($this->string, $length) as $chunk) {
  104. $str->string = $chunk;
  105. $chunks[] = clone $str;
  106. }
  107. return $chunks;
  108. }
  109. public function endsWith($suffix): bool
  110. {
  111. if ($suffix instanceof parent) {
  112. $suffix = $suffix->string;
  113. } elseif (\is_array($suffix) || $suffix instanceof \Traversable) {
  114. return parent::endsWith($suffix);
  115. } else {
  116. $suffix = (string) $suffix;
  117. }
  118. return '' !== $suffix && \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix), null, $this->ignoreCase);
  119. }
  120. public function equalsTo($string): bool
  121. {
  122. if ($string instanceof parent) {
  123. $string = $string->string;
  124. } elseif (\is_array($string) || $string instanceof \Traversable) {
  125. return parent::equalsTo($string);
  126. } else {
  127. $string = (string) $string;
  128. }
  129. if ('' !== $string && $this->ignoreCase) {
  130. return 0 === strcasecmp($string, $this->string);
  131. }
  132. return $string === $this->string;
  133. }
  134. public function folded(): parent
  135. {
  136. $str = clone $this;
  137. $str->string = strtolower($str->string);
  138. return $str;
  139. }
  140. public function indexOf($needle, int $offset = 0): ?int
  141. {
  142. if ($needle instanceof parent) {
  143. $needle = $needle->string;
  144. } elseif (\is_array($needle) || $needle instanceof \Traversable) {
  145. return parent::indexOf($needle, $offset);
  146. } else {
  147. $needle = (string) $needle;
  148. }
  149. if ('' === $needle) {
  150. return null;
  151. }
  152. $i = $this->ignoreCase ? stripos($this->string, $needle, $offset) : strpos($this->string, $needle, $offset);
  153. return false === $i ? null : $i;
  154. }
  155. public function indexOfLast($needle, int $offset = 0): ?int
  156. {
  157. if ($needle instanceof parent) {
  158. $needle = $needle->string;
  159. } elseif (\is_array($needle) || $needle instanceof \Traversable) {
  160. return parent::indexOfLast($needle, $offset);
  161. } else {
  162. $needle = (string) $needle;
  163. }
  164. if ('' === $needle) {
  165. return null;
  166. }
  167. $i = $this->ignoreCase ? strripos($this->string, $needle, $offset) : strrpos($this->string, $needle, $offset);
  168. return false === $i ? null : $i;
  169. }
  170. public function isUtf8(): bool
  171. {
  172. return '' === $this->string || preg_match('//u', $this->string);
  173. }
  174. public function join(array $strings, string $lastGlue = null): parent
  175. {
  176. $str = clone $this;
  177. $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
  178. $str->string = implode($this->string, $strings).$tail;
  179. return $str;
  180. }
  181. public function length(): int
  182. {
  183. return \strlen($this->string);
  184. }
  185. public function lower(): parent
  186. {
  187. $str = clone $this;
  188. $str->string = strtolower($str->string);
  189. return $str;
  190. }
  191. public function match(string $regexp, int $flags = 0, int $offset = 0): array
  192. {
  193. $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
  194. if ($this->ignoreCase) {
  195. $regexp .= 'i';
  196. }
  197. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  198. try {
  199. if (false === $match($regexp, $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
  200. $lastError = preg_last_error();
  201. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  202. if ($lastError === $v && '_ERROR' === substr($k, -6)) {
  203. throw new RuntimeException('Matching failed with '.$k.'.');
  204. }
  205. }
  206. throw new RuntimeException('Matching failed with unknown error code.');
  207. }
  208. } finally {
  209. restore_error_handler();
  210. }
  211. return $matches;
  212. }
  213. public function padBoth(int $length, string $padStr = ' '): parent
  214. {
  215. $str = clone $this;
  216. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_BOTH);
  217. return $str;
  218. }
  219. public function padEnd(int $length, string $padStr = ' '): parent
  220. {
  221. $str = clone $this;
  222. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_RIGHT);
  223. return $str;
  224. }
  225. public function padStart(int $length, string $padStr = ' '): parent
  226. {
  227. $str = clone $this;
  228. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_LEFT);
  229. return $str;
  230. }
  231. public function prepend(string ...$prefix): parent
  232. {
  233. $str = clone $this;
  234. $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$str->string;
  235. return $str;
  236. }
  237. public function replace(string $from, string $to): parent
  238. {
  239. $str = clone $this;
  240. if ('' !== $from) {
  241. $str->string = $this->ignoreCase ? str_ireplace($from, $to, $this->string) : str_replace($from, $to, $this->string);
  242. }
  243. return $str;
  244. }
  245. public function replaceMatches(string $fromRegexp, $to): parent
  246. {
  247. if ($this->ignoreCase) {
  248. $fromRegexp .= 'i';
  249. }
  250. if (\is_array($to)) {
  251. if (!\is_callable($to)) {
  252. throw new \TypeError(sprintf('Argument 2 passed to "%s::replaceMatches()" must be callable, array given.', static::class));
  253. }
  254. $replace = 'preg_replace_callback';
  255. } else {
  256. $replace = $to instanceof \Closure ? 'preg_replace_callback' : 'preg_replace';
  257. }
  258. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  259. try {
  260. if (null === $string = $replace($fromRegexp, $to, $this->string)) {
  261. $lastError = preg_last_error();
  262. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  263. if ($lastError === $v && '_ERROR' === substr($k, -6)) {
  264. throw new RuntimeException('Matching failed with '.$k.'.');
  265. }
  266. }
  267. throw new RuntimeException('Matching failed with unknown error code.');
  268. }
  269. } finally {
  270. restore_error_handler();
  271. }
  272. $str = clone $this;
  273. $str->string = $string;
  274. return $str;
  275. }
  276. public function reverse(): parent
  277. {
  278. $str = clone $this;
  279. $str->string = strrev($str->string);
  280. return $str;
  281. }
  282. public function slice(int $start = 0, int $length = null): parent
  283. {
  284. $str = clone $this;
  285. $str->string = (string) substr($this->string, $start, $length ?? \PHP_INT_MAX);
  286. return $str;
  287. }
  288. public function snake(): parent
  289. {
  290. $str = $this->camel()->title();
  291. $str->string = strtolower(preg_replace(['/([A-Z]+)([A-Z][a-z])/', '/([a-z\d])([A-Z])/'], '\1_\2', $str->string));
  292. return $str;
  293. }
  294. public function splice(string $replacement, int $start = 0, int $length = null): parent
  295. {
  296. $str = clone $this;
  297. $str->string = substr_replace($this->string, $replacement, $start, $length ?? \PHP_INT_MAX);
  298. return $str;
  299. }
  300. public function split(string $delimiter, int $limit = null, int $flags = null): array
  301. {
  302. if (1 > $limit = $limit ?? \PHP_INT_MAX) {
  303. throw new InvalidArgumentException('Split limit must be a positive integer.');
  304. }
  305. if ('' === $delimiter) {
  306. throw new InvalidArgumentException('Split delimiter is empty.');
  307. }
  308. if (null !== $flags) {
  309. return parent::split($delimiter, $limit, $flags);
  310. }
  311. $str = clone $this;
  312. $chunks = $this->ignoreCase
  313. ? preg_split('{'.preg_quote($delimiter).'}iD', $this->string, $limit)
  314. : explode($delimiter, $this->string, $limit);
  315. foreach ($chunks as &$chunk) {
  316. $str->string = $chunk;
  317. $chunk = clone $str;
  318. }
  319. return $chunks;
  320. }
  321. public function startsWith($prefix): bool
  322. {
  323. if ($prefix instanceof parent) {
  324. $prefix = $prefix->string;
  325. } elseif (!\is_string($prefix)) {
  326. return parent::startsWith($prefix);
  327. }
  328. return '' !== $prefix && 0 === ($this->ignoreCase ? strncasecmp($this->string, $prefix, \strlen($prefix)) : strncmp($this->string, $prefix, \strlen($prefix)));
  329. }
  330. public function title(bool $allWords = false): parent
  331. {
  332. $str = clone $this;
  333. $str->string = $allWords ? ucwords($str->string) : ucfirst($str->string);
  334. return $str;
  335. }
  336. public function toUnicodeString(string $fromEncoding = null): UnicodeString
  337. {
  338. return new UnicodeString($this->toCodePointString($fromEncoding)->string);
  339. }
  340. public function toCodePointString(string $fromEncoding = null): CodePointString
  341. {
  342. $u = new CodePointString();
  343. if (\in_array($fromEncoding, [null, 'utf8', 'utf-8', 'UTF8', 'UTF-8'], true) && preg_match('//u', $this->string)) {
  344. $u->string = $this->string;
  345. return $u;
  346. }
  347. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  348. try {
  349. try {
  350. $validEncoding = false !== mb_detect_encoding($this->string, $fromEncoding ?? 'Windows-1252', true);
  351. } catch (InvalidArgumentException $e) {
  352. if (!\function_exists('iconv')) {
  353. throw $e;
  354. }
  355. $u->string = iconv($fromEncoding ?? 'Windows-1252', 'UTF-8', $this->string);
  356. return $u;
  357. }
  358. } finally {
  359. restore_error_handler();
  360. }
  361. if (!$validEncoding) {
  362. throw new InvalidArgumentException(sprintf('Invalid "%s" string.', $fromEncoding ?? 'Windows-1252'));
  363. }
  364. $u->string = mb_convert_encoding($this->string, 'UTF-8', $fromEncoding ?? 'Windows-1252');
  365. return $u;
  366. }
  367. public function trim(string $chars = " \t\n\r\0\x0B\x0C"): parent
  368. {
  369. $str = clone $this;
  370. $str->string = trim($str->string, $chars);
  371. return $str;
  372. }
  373. public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C"): parent
  374. {
  375. $str = clone $this;
  376. $str->string = rtrim($str->string, $chars);
  377. return $str;
  378. }
  379. public function trimStart(string $chars = " \t\n\r\0\x0B\x0C"): parent
  380. {
  381. $str = clone $this;
  382. $str->string = ltrim($str->string, $chars);
  383. return $str;
  384. }
  385. public function upper(): parent
  386. {
  387. $str = clone $this;
  388. $str->string = strtoupper($str->string);
  389. return $str;
  390. }
  391. public function width(bool $ignoreAnsiDecoration = true): int
  392. {
  393. $string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
  394. return (new CodePointString($string))->width($ignoreAnsiDecoration);
  395. }
  396. }