ByteString.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String;
  11. use Symfony\Component\String\Exception\ExceptionInterface;
  12. use Symfony\Component\String\Exception\InvalidArgumentException;
  13. use Symfony\Component\String\Exception\RuntimeException;
  14. /**
  15. * Represents a binary-safe string of bytes.
  16. *
  17. * @author Nicolas Grekas <p@tchwork.com>
  18. * @author Hugo Hamon <hugohamon@neuf.fr>
  19. *
  20. * @throws ExceptionInterface
  21. */
  22. class ByteString extends AbstractString
  23. {
  24. private const ALPHABET_ALPHANUMERIC = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
  25. public function __construct(string $string = '')
  26. {
  27. $this->string = $string;
  28. }
  29. /*
  30. * The following method was derived from code of the Hack Standard Library (v4.40 - 2020-05-03)
  31. *
  32. * https://github.com/hhvm/hsl/blob/80a42c02f036f72a42f0415e80d6b847f4bf62d5/src/random/private.php#L16
  33. *
  34. * Code subject to the MIT license (https://github.com/hhvm/hsl/blob/master/LICENSE).
  35. *
  36. * Copyright (c) 2004-2020, Facebook, Inc. (https://www.facebook.com/)
  37. */
  38. public static function fromRandom(int $length = 16, string $alphabet = null): self
  39. {
  40. if ($length <= 0) {
  41. throw new InvalidArgumentException(sprintf('A strictly positive length is expected, "%d" given.', $length));
  42. }
  43. $alphabet = $alphabet ?? self::ALPHABET_ALPHANUMERIC;
  44. $alphabetSize = \strlen($alphabet);
  45. $bits = (int) ceil(log($alphabetSize, 2.0));
  46. if ($bits <= 0 || $bits > 56) {
  47. throw new InvalidArgumentException('The length of the alphabet must in the [2^1, 2^56] range.');
  48. }
  49. $ret = '';
  50. while ($length > 0) {
  51. $urandomLength = (int) ceil(2 * $length * $bits / 8.0);
  52. $data = random_bytes($urandomLength);
  53. $unpackedData = 0;
  54. $unpackedBits = 0;
  55. for ($i = 0; $i < $urandomLength && $length > 0; ++$i) {
  56. // Unpack 8 bits
  57. $unpackedData = ($unpackedData << 8) | \ord($data[$i]);
  58. $unpackedBits += 8;
  59. // While we have enough bits to select a character from the alphabet, keep
  60. // consuming the random data
  61. for (; $unpackedBits >= $bits && $length > 0; $unpackedBits -= $bits) {
  62. $index = ($unpackedData & ((1 << $bits) - 1));
  63. $unpackedData >>= $bits;
  64. // Unfortunately, the alphabet size is not necessarily a power of two.
  65. // Worst case, it is 2^k + 1, which means we need (k+1) bits and we
  66. // have around a 50% chance of missing as k gets larger
  67. if ($index < $alphabetSize) {
  68. $ret .= $alphabet[$index];
  69. --$length;
  70. }
  71. }
  72. }
  73. }
  74. return new static($ret);
  75. }
  76. public function bytesAt(int $offset): array
  77. {
  78. $str = $this->string[$offset] ?? '';
  79. return '' === $str ? [] : [\ord($str)];
  80. }
  81. public function append(string ...$suffix): static
  82. {
  83. $str = clone $this;
  84. $str->string .= 1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix);
  85. return $str;
  86. }
  87. public function camel(): static
  88. {
  89. $str = clone $this;
  90. $str->string = lcfirst(str_replace(' ', '', ucwords(preg_replace('/[^a-zA-Z0-9\x7f-\xff]++/', ' ', $this->string))));
  91. return $str;
  92. }
  93. public function chunk(int $length = 1): array
  94. {
  95. if (1 > $length) {
  96. throw new InvalidArgumentException('The chunk length must be greater than zero.');
  97. }
  98. if ('' === $this->string) {
  99. return [];
  100. }
  101. $str = clone $this;
  102. $chunks = [];
  103. foreach (str_split($this->string, $length) as $chunk) {
  104. $str->string = $chunk;
  105. $chunks[] = clone $str;
  106. }
  107. return $chunks;
  108. }
  109. public function endsWith(string|iterable|AbstractString $suffix): bool
  110. {
  111. if ($suffix instanceof AbstractString) {
  112. $suffix = $suffix->string;
  113. } elseif (!\is_string($suffix)) {
  114. return parent::endsWith($suffix);
  115. }
  116. return '' !== $suffix && \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix), null, $this->ignoreCase);
  117. }
  118. public function equalsTo(string|iterable|AbstractString $string): bool
  119. {
  120. if ($string instanceof AbstractString) {
  121. $string = $string->string;
  122. } elseif (!\is_string($string)) {
  123. return parent::equalsTo($string);
  124. }
  125. if ('' !== $string && $this->ignoreCase) {
  126. return 0 === strcasecmp($string, $this->string);
  127. }
  128. return $string === $this->string;
  129. }
  130. public function folded(): static
  131. {
  132. $str = clone $this;
  133. $str->string = strtolower($str->string);
  134. return $str;
  135. }
  136. public function indexOf(string|iterable|AbstractString $needle, int $offset = 0): ?int
  137. {
  138. if ($needle instanceof AbstractString) {
  139. $needle = $needle->string;
  140. } elseif (!\is_string($needle)) {
  141. return parent::indexOf($needle, $offset);
  142. }
  143. if ('' === $needle) {
  144. return null;
  145. }
  146. $i = $this->ignoreCase ? stripos($this->string, $needle, $offset) : strpos($this->string, $needle, $offset);
  147. return false === $i ? null : $i;
  148. }
  149. public function indexOfLast(string|iterable|AbstractString $needle, int $offset = 0): ?int
  150. {
  151. if ($needle instanceof AbstractString) {
  152. $needle = $needle->string;
  153. } elseif (!\is_string($needle)) {
  154. return parent::indexOfLast($needle, $offset);
  155. }
  156. if ('' === $needle) {
  157. return null;
  158. }
  159. $i = $this->ignoreCase ? strripos($this->string, $needle, $offset) : strrpos($this->string, $needle, $offset);
  160. return false === $i ? null : $i;
  161. }
  162. public function isUtf8(): bool
  163. {
  164. return '' === $this->string || preg_match('//u', $this->string);
  165. }
  166. public function join(array $strings, string $lastGlue = null): static
  167. {
  168. $str = clone $this;
  169. $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
  170. $str->string = implode($this->string, $strings).$tail;
  171. return $str;
  172. }
  173. public function length(): int
  174. {
  175. return \strlen($this->string);
  176. }
  177. public function lower(): static
  178. {
  179. $str = clone $this;
  180. $str->string = strtolower($str->string);
  181. return $str;
  182. }
  183. public function match(string $regexp, int $flags = 0, int $offset = 0): array
  184. {
  185. $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
  186. if ($this->ignoreCase) {
  187. $regexp .= 'i';
  188. }
  189. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  190. try {
  191. if (false === $match($regexp, $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
  192. $lastError = preg_last_error();
  193. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  194. if ($lastError === $v && '_ERROR' === substr($k, -6)) {
  195. throw new RuntimeException('Matching failed with '.$k.'.');
  196. }
  197. }
  198. throw new RuntimeException('Matching failed with unknown error code.');
  199. }
  200. } finally {
  201. restore_error_handler();
  202. }
  203. return $matches;
  204. }
  205. public function padBoth(int $length, string $padStr = ' '): static
  206. {
  207. $str = clone $this;
  208. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_BOTH);
  209. return $str;
  210. }
  211. public function padEnd(int $length, string $padStr = ' '): static
  212. {
  213. $str = clone $this;
  214. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_RIGHT);
  215. return $str;
  216. }
  217. public function padStart(int $length, string $padStr = ' '): static
  218. {
  219. $str = clone $this;
  220. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_LEFT);
  221. return $str;
  222. }
  223. public function prepend(string ...$prefix): static
  224. {
  225. $str = clone $this;
  226. $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$str->string;
  227. return $str;
  228. }
  229. public function replace(string $from, string $to): static
  230. {
  231. $str = clone $this;
  232. if ('' !== $from) {
  233. $str->string = $this->ignoreCase ? str_ireplace($from, $to, $this->string) : str_replace($from, $to, $this->string);
  234. }
  235. return $str;
  236. }
  237. public function replaceMatches(string $fromRegexp, string|callable $to): static
  238. {
  239. if ($this->ignoreCase) {
  240. $fromRegexp .= 'i';
  241. }
  242. $replace = \is_array($to) || $to instanceof \Closure ? 'preg_replace_callback' : 'preg_replace';
  243. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  244. try {
  245. if (null === $string = $replace($fromRegexp, $to, $this->string)) {
  246. $lastError = preg_last_error();
  247. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  248. if ($lastError === $v && '_ERROR' === substr($k, -6)) {
  249. throw new RuntimeException('Matching failed with '.$k.'.');
  250. }
  251. }
  252. throw new RuntimeException('Matching failed with unknown error code.');
  253. }
  254. } finally {
  255. restore_error_handler();
  256. }
  257. $str = clone $this;
  258. $str->string = $string;
  259. return $str;
  260. }
  261. public function reverse(): static
  262. {
  263. $str = clone $this;
  264. $str->string = strrev($str->string);
  265. return $str;
  266. }
  267. public function slice(int $start = 0, int $length = null): static
  268. {
  269. $str = clone $this;
  270. $str->string = (string) substr($this->string, $start, $length ?? \PHP_INT_MAX);
  271. return $str;
  272. }
  273. public function snake(): static
  274. {
  275. $str = $this->camel()->title();
  276. $str->string = strtolower(preg_replace(['/([A-Z]+)([A-Z][a-z])/', '/([a-z\d])([A-Z])/'], '\1_\2', $str->string));
  277. return $str;
  278. }
  279. public function splice(string $replacement, int $start = 0, int $length = null): static
  280. {
  281. $str = clone $this;
  282. $str->string = substr_replace($this->string, $replacement, $start, $length ?? \PHP_INT_MAX);
  283. return $str;
  284. }
  285. public function split(string $delimiter, int $limit = null, int $flags = null): array
  286. {
  287. if (1 > $limit = $limit ?? \PHP_INT_MAX) {
  288. throw new InvalidArgumentException('Split limit must be a positive integer.');
  289. }
  290. if ('' === $delimiter) {
  291. throw new InvalidArgumentException('Split delimiter is empty.');
  292. }
  293. if (null !== $flags) {
  294. return parent::split($delimiter, $limit, $flags);
  295. }
  296. $str = clone $this;
  297. $chunks = $this->ignoreCase
  298. ? preg_split('{'.preg_quote($delimiter).'}iD', $this->string, $limit)
  299. : explode($delimiter, $this->string, $limit);
  300. foreach ($chunks as &$chunk) {
  301. $str->string = $chunk;
  302. $chunk = clone $str;
  303. }
  304. return $chunks;
  305. }
  306. public function startsWith(string|iterable|AbstractString $prefix): bool
  307. {
  308. if ($prefix instanceof AbstractString) {
  309. $prefix = $prefix->string;
  310. } elseif (!\is_string($prefix)) {
  311. return parent::startsWith($prefix);
  312. }
  313. return '' !== $prefix && 0 === ($this->ignoreCase ? strncasecmp($this->string, $prefix, \strlen($prefix)) : strncmp($this->string, $prefix, \strlen($prefix)));
  314. }
  315. public function title(bool $allWords = false): static
  316. {
  317. $str = clone $this;
  318. $str->string = $allWords ? ucwords($str->string) : ucfirst($str->string);
  319. return $str;
  320. }
  321. public function toUnicodeString(string $fromEncoding = null): UnicodeString
  322. {
  323. return new UnicodeString($this->toCodePointString($fromEncoding)->string);
  324. }
  325. public function toCodePointString(string $fromEncoding = null): CodePointString
  326. {
  327. $u = new CodePointString();
  328. if (\in_array($fromEncoding, [null, 'utf8', 'utf-8', 'UTF8', 'UTF-8'], true) && preg_match('//u', $this->string)) {
  329. $u->string = $this->string;
  330. return $u;
  331. }
  332. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  333. try {
  334. try {
  335. $validEncoding = false !== mb_detect_encoding($this->string, $fromEncoding ?? 'Windows-1252', true);
  336. } catch (InvalidArgumentException $e) {
  337. if (!\function_exists('iconv')) {
  338. throw $e;
  339. }
  340. $u->string = iconv($fromEncoding ?? 'Windows-1252', 'UTF-8', $this->string);
  341. return $u;
  342. }
  343. } finally {
  344. restore_error_handler();
  345. }
  346. if (!$validEncoding) {
  347. throw new InvalidArgumentException(sprintf('Invalid "%s" string.', $fromEncoding ?? 'Windows-1252'));
  348. }
  349. $u->string = mb_convert_encoding($this->string, 'UTF-8', $fromEncoding ?? 'Windows-1252');
  350. return $u;
  351. }
  352. public function trim(string $chars = " \t\n\r\0\x0B\x0C"): static
  353. {
  354. $str = clone $this;
  355. $str->string = trim($str->string, $chars);
  356. return $str;
  357. }
  358. public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C"): static
  359. {
  360. $str = clone $this;
  361. $str->string = rtrim($str->string, $chars);
  362. return $str;
  363. }
  364. public function trimStart(string $chars = " \t\n\r\0\x0B\x0C"): static
  365. {
  366. $str = clone $this;
  367. $str->string = ltrim($str->string, $chars);
  368. return $str;
  369. }
  370. public function upper(): static
  371. {
  372. $str = clone $this;
  373. $str->string = strtoupper($str->string);
  374. return $str;
  375. }
  376. public function width(bool $ignoreAnsiDecoration = true): int
  377. {
  378. $string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
  379. return (new CodePointString($string))->width($ignoreAnsiDecoration);
  380. }
  381. }