2022-09-13 13:13:11 +00:00
|
|
|
<?php
|
|
|
|
// WString.php
|
|
|
|
// Created: 2021-06-22
|
2023-07-05 01:28:33 +00:00
|
|
|
// Updated: 2023-07-05
|
2022-09-13 13:13:11 +00:00
|
|
|
|
|
|
|
namespace Index;
|
|
|
|
|
|
|
|
use Traversable;
|
|
|
|
use InvalidArgumentException;
|
|
|
|
use ValueError;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Provides an immutable multi-byte string with arrow methods.
|
|
|
|
*/
|
|
|
|
final class WString implements IString {
|
|
|
|
use XStringTrait;
|
|
|
|
|
|
|
|
public const DEFAULT_ENCODING = 'utf-8';
|
|
|
|
|
|
|
|
private static $defaultEncoding = self::DEFAULT_ENCODING;
|
|
|
|
|
|
|
|
private string $value;
|
|
|
|
private string $encoding;
|
|
|
|
|
|
|
|
public function __construct(string $value = '', ?string $encoding = null) {
|
|
|
|
$encoding ??= self::$defaultEncoding;
|
|
|
|
|
|
|
|
if(!mb_check_encoding($value, $encoding))
|
|
|
|
throw new InvalidArgumentException('$value is not a valid value for the selected encoding.');
|
|
|
|
|
|
|
|
$this->value = $value;
|
|
|
|
$this->encoding = mb_preferred_mime_name($encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getLength(): int {
|
|
|
|
return mb_strlen($this->value, $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the amount of bytes the string contains.
|
|
|
|
*
|
|
|
|
* @return int Amount of raw bytes.
|
|
|
|
*/
|
|
|
|
public function getByteCount(): int {
|
|
|
|
return strlen($this->value);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function isEmpty(): bool {
|
|
|
|
// an empty string is an empty string so this should be fine regardless of encoding
|
|
|
|
return $this->value === '';
|
|
|
|
}
|
|
|
|
|
|
|
|
public function indexOf(IString|string $text, int $offset = 0): int {
|
|
|
|
$text = (string)self::castEncoding($text, $this->encoding);
|
|
|
|
$pos = mb_strpos($this->value, $text, $offset, $this->encoding);
|
|
|
|
if($pos === false)
|
|
|
|
return -1;
|
|
|
|
return $pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function contains(IString|string $text): bool {
|
|
|
|
return str_contains($this->value, (string)$text);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function substring(int $offset, int|null $length = null): IString {
|
|
|
|
return new WString(mb_substr($this->value, $offset, $length, $this->encoding), $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function startsWith(IString|string $text): bool {
|
|
|
|
$text = self::castEncoding($text, $this->encoding);
|
|
|
|
return $this->substring(0, $text->getLength())->equals($text);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function endsWith(IString|string $text): bool {
|
|
|
|
$text = self::castEncoding($text, $this->encoding);
|
|
|
|
return $this->substring(0 - $text->getLength())->equals($text);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function replace(IString|string $search, IString|string $replace): IString {
|
|
|
|
$search = (string)self::castEncoding($search, $this->encoding);
|
|
|
|
$replace = (string)self::castEncoding($replace, $this->encoding);
|
|
|
|
$parts = self::doRegex(fn() => mb_split(preg_quote($search), $this->value));
|
|
|
|
$subject = implode($replace, $parts);
|
|
|
|
return new WString($subject, $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function append(IString|string $string): IString {
|
|
|
|
$string = self::castEncoding($string, $this->encoding);
|
|
|
|
return new WString($this->value . $string->value, $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function prepend(IString|string $string): IString {
|
|
|
|
$string = self::castEncoding($string, $this->encoding);
|
|
|
|
return new WString($string->value . $this->value, $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function split(IString|string $separator, int $limit = PHP_INT_MAX): array {
|
|
|
|
return XArray::select(
|
|
|
|
self::doRegex(fn() => mb_split(preg_quote((string)$separator), $this->value, $limit)),
|
|
|
|
fn($str) => new WString($str, $this->encoding)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function chunk(int $chunkSize): array {
|
|
|
|
return XArray::select(
|
|
|
|
mb_str_split($this->value, $chunkSize, $this->encoding),
|
|
|
|
fn($str) => new WString($str, $this->encoding)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
private function trimInternal(IString|string $characters, int $flags): IString {
|
|
|
|
if($flags & 0x01) {
|
|
|
|
if($characters instanceof WString)
|
|
|
|
$characters = (string)$characters->convertEncoding($this->encoding);
|
|
|
|
else
|
|
|
|
$characters = mb_convert_encoding((string)$characters, $this->encoding, 'ascii');
|
|
|
|
} else
|
|
|
|
$characters = (string)$characters;
|
|
|
|
|
|
|
|
$start = 0;
|
|
|
|
$end = $this->getLength() - 1;
|
|
|
|
|
|
|
|
if($flags & 0x02)
|
|
|
|
for(; $start < $this->getLength(); ++$start)
|
|
|
|
if(mb_strpos($characters, $this[$start], 0, $this->encoding) === false)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if($flags & 0x04)
|
|
|
|
for(; $end > 0; --$end)
|
|
|
|
if(mb_strpos($characters, $this[$end], 0, $this->encoding) === false)
|
|
|
|
break;
|
|
|
|
|
|
|
|
return $this->substring($start, $end - $start + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function trim(IString|string $characters = IString::TRIM_CHARS, bool $convertChars = true): IString {
|
|
|
|
$flags = 0x06;
|
|
|
|
if($convertChars) $flags |= 0x01;
|
|
|
|
return $this->trimInternal($characters, $flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function trimStart(IString|string $characters = IString::TRIM_CHARS, bool $convertChars = true): IString {
|
|
|
|
$flags = 0x02;
|
|
|
|
if($convertChars) $flags |= 0x01;
|
|
|
|
return $this->trimInternal($characters, $flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function trimEnd(IString|string $characters = IString::TRIM_CHARS, bool $convertChars = true): IString {
|
|
|
|
$flags = 0x04;
|
|
|
|
if($convertChars) $flags |= 0x01;
|
|
|
|
return $this->trimInternal($characters, $flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function toLower(): IString {
|
|
|
|
return new WString(mb_strtolower($this->value, $this->encoding), $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function toUpper(): IString {
|
|
|
|
return new WString(mb_strtoupper($this->value, $this->encoding), $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function reverse(): IString {
|
|
|
|
$chars = [];
|
|
|
|
foreach($this as $char)
|
|
|
|
$chars[] = $char;
|
|
|
|
return new WString(implode(array_reverse($chars)), $this->encoding);
|
|
|
|
}
|
|
|
|
|
2023-07-05 01:28:33 +00:00
|
|
|
public function countUnique(): int {
|
|
|
|
$chars = [];
|
|
|
|
|
|
|
|
foreach($this as $char)
|
|
|
|
if(!in_array($char, $chars, true))
|
|
|
|
$chars[] = $char;
|
|
|
|
|
|
|
|
return count($chars);
|
|
|
|
}
|
|
|
|
|
2022-09-13 13:13:11 +00:00
|
|
|
public function __toString(): string {
|
|
|
|
return $this->value;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function toAString(bool $convert = true): AString {
|
|
|
|
$value = $this->value;
|
|
|
|
if($convert && self::sameEncoding('ascii', $this->encoding))
|
|
|
|
$value = mb_convert_encoding($value, 'ascii', $this->encoding);
|
|
|
|
return new AString($value);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getEncoding(): string {
|
|
|
|
return $this->encoding;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function convertEncoding(string $encoding): WString {
|
|
|
|
if(self::sameEncoding($encoding, $this->encoding))
|
|
|
|
return $this;
|
|
|
|
$value = mb_convert_encoding($this->value, $encoding, $this->encoding);
|
|
|
|
return new WString($value, $encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if an offset exists in the string.
|
|
|
|
*
|
|
|
|
* You should call isset($string[$offset]) instead of $string->offsetExists($offset).
|
|
|
|
*
|
|
|
|
* @see https://www.php.net/manual/en/arrayaccess.offsetexists.php
|
|
|
|
* @param int $offset Character offset.
|
|
|
|
* @return bool true if it exists, false if not.
|
|
|
|
*/
|
|
|
|
public function offsetExists(mixed $offset): bool {
|
|
|
|
$offset = (int)$offset;
|
|
|
|
return $offset >= 0 && $offset < $this->getLength();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets an offset from the string.
|
|
|
|
*
|
|
|
|
* You should do $string[$offset] instead of $string->offsetGet($offset).
|
|
|
|
*
|
|
|
|
* @see https://www.php.net/manual/en/arrayaccess.offsetget.php
|
|
|
|
* @param int $offset Character offset.
|
|
|
|
* @return string Character at that offset.
|
|
|
|
*/
|
|
|
|
public function offsetGet(mixed $offset): mixed {
|
|
|
|
return mb_substr($this->value, (int)$offset, 1, $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets an iterator object for this string.
|
|
|
|
*
|
|
|
|
* @return StringIterator An iterator for this string.
|
|
|
|
*/
|
|
|
|
public function getIterator(): Traversable {
|
|
|
|
return new StringIterator($this);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the data which should be serialized as json.
|
|
|
|
*
|
|
|
|
* @see https://www.php.net/manual/en/jsonserializable.jsonserialize.php
|
|
|
|
* @return mixed Data to be passed to json_encode.
|
|
|
|
*/
|
|
|
|
public function jsonSerialize(): mixed {
|
|
|
|
return $this->value;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function bencodeSerialise(): mixed {
|
|
|
|
return $this->value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets a serialized representation of this object.
|
|
|
|
*
|
|
|
|
* @return array Serialized data.
|
|
|
|
*/
|
|
|
|
public function __serialize(): array {
|
|
|
|
return [$this->encoding, $this->value];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reconstructs an object from a serialized string.
|
|
|
|
*
|
|
|
|
* @param array $serialized Serialized data.
|
|
|
|
*/
|
|
|
|
public function __unserialize(array $serialized): void {
|
|
|
|
[$this->encoding, $this->value] = $serialized;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks whether this string is identical to another.
|
|
|
|
*
|
|
|
|
* @param mixed $other An instance of IString or a PHP string.
|
|
|
|
* @return bool true if the strings have the same value, false if not.
|
|
|
|
*/
|
|
|
|
public function equals(mixed $other): bool {
|
|
|
|
return $this->compare($other) === 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compares whether this string is identical to another.
|
|
|
|
*
|
|
|
|
* @param mixed $other An instance of IString or a PHP string.
|
|
|
|
*/
|
|
|
|
public function compare(mixed $other): int {
|
|
|
|
$other = self::castEncoding($other, $this->encoding);
|
|
|
|
return strcmp($this->value, (string)$other);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new identical WString instance.
|
|
|
|
*
|
|
|
|
* This method is somewhat pointless, given the immutable nature of this object,
|
|
|
|
* but rather people calling this instead of calling ->substring(0);
|
|
|
|
*
|
|
|
|
* @return WString A new identical instance of WString.
|
|
|
|
*/
|
|
|
|
public function clone(): mixed {
|
|
|
|
return new WString($this->value, $this->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
private function doRegex(callable $callback): mixed {
|
|
|
|
$encoding = self::getRegexEncoding();
|
|
|
|
self::setRegexEncoding($this->encoding);
|
|
|
|
$result = $callback();
|
|
|
|
self::setRegexEncoding($encoding);
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static function sameEncoding(string $name1, string $name2): bool {
|
|
|
|
return mb_preferred_mime_name($name1) === mb_preferred_mime_name($name2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Joins an iterable object together with a separator to create a string.
|
|
|
|
*
|
|
|
|
* @param iterable $source Source object.
|
|
|
|
* @param IString|string $separator Separator to use as glue.
|
|
|
|
* @param ?string $encoding Desired encoding, null for Index default.
|
|
|
|
* @return WString Resulting string.
|
|
|
|
*/
|
|
|
|
public static function join(iterable $source, IString|string $separator = '', ?string $encoding = null): WString {
|
|
|
|
// func probably doesn't work entirely as intended
|
|
|
|
$encoding ??= self::getDefaultEncoding();
|
|
|
|
$separator = self::castEncoding($separator, $encoding);
|
|
|
|
if(!is_array($source)) {
|
|
|
|
$parts = [];
|
|
|
|
foreach($source as $value)
|
|
|
|
$parts[] = $value;
|
|
|
|
$source = $parts;
|
|
|
|
}
|
|
|
|
|
|
|
|
return new WString(implode((string)$separator, $source), $encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function getDefaultEncoding(): string {
|
|
|
|
return self::$defaultEncoding;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function setDefaultEncoding(string $encoding = ''): void {
|
|
|
|
if(empty($encoding))
|
|
|
|
self::$defaultEncoding = self::DEFAULT_ENCODING;
|
|
|
|
else {
|
|
|
|
try {
|
|
|
|
if(mb_encoding_aliases($encoding) !== false)
|
|
|
|
self::$defaultEncoding = $encoding;
|
|
|
|
} catch(ValueError $ex) {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function getInternalEncoding(): string {
|
|
|
|
return mb_internal_encoding();
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function setInternalEncoding(string $encoding): void {
|
|
|
|
mb_internal_encoding($encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function getRegexEncoding(): string {
|
|
|
|
return mb_regex_encoding();
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function setRegexEncoding(string $encoding): void {
|
|
|
|
mb_regex_encoding($encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function fromRequest(string $raw): WString {
|
|
|
|
return new WString($raw, self::getInternalEncoding());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a reusable empty string instance.
|
|
|
|
*
|
|
|
|
* @return WString An empty string.
|
|
|
|
*/
|
|
|
|
public static function empty(): WString {
|
|
|
|
static $empty = null;
|
|
|
|
$empty ??= new WString('');
|
|
|
|
return $empty;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function cast(mixed $string): WString {
|
|
|
|
if($string instanceof WString)
|
|
|
|
return $string;
|
|
|
|
if($string instanceof AString)
|
|
|
|
return $string->toWString('ascii', false);
|
|
|
|
return new WString((string)$string, 'ascii');
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function castEncoding(mixed $string, string $encoding): WString {
|
|
|
|
return self::cast($string)->convertEncoding($encoding);
|
|
|
|
}
|
|
|
|
}
|