index/src/WString.php

382 lines
12 KiB
PHP

<?php
// WString.php
// Created: 2021-06-22
// Updated: 2022-02-27
namespace Index;
use Traversable;
use InvalidArgumentException;
use ValueError;
/**
* Provides an immutable multi-byte string with arrow methods.
*/
final class WString implements IString {
use XStringTrait;
public const DEFAULT_ENCODING = 'utf-8';
private static $defaultEncoding = self::DEFAULT_ENCODING;
private string $value;
private string $encoding;
public function __construct(string $value = '', ?string $encoding = null) {
$encoding ??= self::$defaultEncoding;
if(!mb_check_encoding($value, $encoding))
throw new InvalidArgumentException('$value is not a valid value for the selected encoding.');
$this->value = $value;
$this->encoding = mb_preferred_mime_name($encoding);
}
public function getLength(): int {
return mb_strlen($this->value, $this->encoding);
}
/**
* Returns the amount of bytes the string contains.
*
* @return int Amount of raw bytes.
*/
public function getByteCount(): int {
return strlen($this->value);
}
public function isEmpty(): bool {
// an empty string is an empty string so this should be fine regardless of encoding
return $this->value === '';
}
public function indexOf(IString|string $text, int $offset = 0): int {
$text = (string)self::castEncoding($text, $this->encoding);
$pos = mb_strpos($this->value, $text, $offset, $this->encoding);
if($pos === false)
return -1;
return $pos;
}
public function contains(IString|string $text): bool {
return str_contains($this->value, (string)$text);
}
public function substring(int $offset, int|null $length = null): IString {
return new WString(mb_substr($this->value, $offset, $length, $this->encoding), $this->encoding);
}
public function startsWith(IString|string $text): bool {
$text = self::castEncoding($text, $this->encoding);
return $this->substring(0, $text->getLength())->equals($text);
}
public function endsWith(IString|string $text): bool {
$text = self::castEncoding($text, $this->encoding);
return $this->substring(0 - $text->getLength())->equals($text);
}
public function replace(IString|string $search, IString|string $replace): IString {
$search = (string)self::castEncoding($search, $this->encoding);
$replace = (string)self::castEncoding($replace, $this->encoding);
$parts = self::doRegex(fn() => mb_split(preg_quote($search), $this->value));
$subject = implode($replace, $parts);
return new WString($subject, $this->encoding);
}
public function append(IString|string $string): IString {
$string = self::castEncoding($string, $this->encoding);
return new WString($this->value . $string->value, $this->encoding);
}
public function prepend(IString|string $string): IString {
$string = self::castEncoding($string, $this->encoding);
return new WString($string->value . $this->value, $this->encoding);
}
public function split(IString|string $separator, int $limit = PHP_INT_MAX): array {
return XArray::select(
self::doRegex(fn() => mb_split(preg_quote((string)$separator), $this->value, $limit)),
fn($str) => new WString($str, $this->encoding)
);
}
public function chunk(int $chunkSize): array {
return XArray::select(
mb_str_split($this->value, $chunkSize, $this->encoding),
fn($str) => new WString($str, $this->encoding)
);
}
private function trimInternal(IString|string $characters, int $flags): IString {
if($flags & 0x01) {
if($characters instanceof WString)
$characters = (string)$characters->convertEncoding($this->encoding);
else
$characters = mb_convert_encoding((string)$characters, $this->encoding, 'ascii');
} else
$characters = (string)$characters;
$start = 0;
$end = $this->getLength() - 1;
if($flags & 0x02)
for(; $start < $this->getLength(); ++$start)
if(mb_strpos($characters, $this[$start], 0, $this->encoding) === false)
break;
if($flags & 0x04)
for(; $end > 0; --$end)
if(mb_strpos($characters, $this[$end], 0, $this->encoding) === false)
break;
return $this->substring($start, $end - $start + 1);
}
public function trim(IString|string $characters = IString::TRIM_CHARS, bool $convertChars = true): IString {
$flags = 0x06;
if($convertChars) $flags |= 0x01;
return $this->trimInternal($characters, $flags);
}
public function trimStart(IString|string $characters = IString::TRIM_CHARS, bool $convertChars = true): IString {
$flags = 0x02;
if($convertChars) $flags |= 0x01;
return $this->trimInternal($characters, $flags);
}
public function trimEnd(IString|string $characters = IString::TRIM_CHARS, bool $convertChars = true): IString {
$flags = 0x04;
if($convertChars) $flags |= 0x01;
return $this->trimInternal($characters, $flags);
}
public function toLower(): IString {
return new WString(mb_strtolower($this->value, $this->encoding), $this->encoding);
}
public function toUpper(): IString {
return new WString(mb_strtoupper($this->value, $this->encoding), $this->encoding);
}
public function reverse(): IString {
$chars = [];
foreach($this as $char)
$chars[] = $char;
return new WString(implode(array_reverse($chars)), $this->encoding);
}
public function __toString(): string {
return $this->value;
}
public function toAString(bool $convert = true): AString {
$value = $this->value;
if($convert && self::sameEncoding('ascii', $this->encoding))
$value = mb_convert_encoding($value, 'ascii', $this->encoding);
return new AString($value);
}
public function getEncoding(): string {
return $this->encoding;
}
public function convertEncoding(string $encoding): WString {
if(self::sameEncoding($encoding, $this->encoding))
return $this;
$value = mb_convert_encoding($this->value, $encoding, $this->encoding);
return new WString($value, $encoding);
}
/**
* Checks if an offset exists in the string.
*
* You should call isset($string[$offset]) instead of $string->offsetExists($offset).
*
* @see https://www.php.net/manual/en/arrayaccess.offsetexists.php
* @param int $offset Character offset.
* @return bool true if it exists, false if not.
*/
public function offsetExists(mixed $offset): bool {
$offset = (int)$offset;
return $offset >= 0 && $offset < $this->getLength();
}
/**
* Gets an offset from the string.
*
* You should do $string[$offset] instead of $string->offsetGet($offset).
*
* @see https://www.php.net/manual/en/arrayaccess.offsetget.php
* @param int $offset Character offset.
* @return string Character at that offset.
*/
public function offsetGet(mixed $offset): mixed {
return mb_substr($this->value, (int)$offset, 1, $this->encoding);
}
/**
* Gets an iterator object for this string.
*
* @return StringIterator An iterator for this string.
*/
public function getIterator(): Traversable {
return new StringIterator($this);
}
/**
* Returns the data which should be serialized as json.
*
* @see https://www.php.net/manual/en/jsonserializable.jsonserialize.php
* @return mixed Data to be passed to json_encode.
*/
public function jsonSerialize(): mixed {
return $this->value;
}
public function bencodeSerialise(): mixed {
return $this->value;
}
/**
* Gets a serialized representation of this object.
*
* @return array Serialized data.
*/
public function __serialize(): array {
return [$this->encoding, $this->value];
}
/**
* Reconstructs an object from a serialized string.
*
* @param array $serialized Serialized data.
*/
public function __unserialize(array $serialized): void {
[$this->encoding, $this->value] = $serialized;
}
/**
* Checks whether this string is identical to another.
*
* @param mixed $other An instance of IString or a PHP string.
* @return bool true if the strings have the same value, false if not.
*/
public function equals(mixed $other): bool {
return $this->compare($other) === 0;
}
/**
* Compares whether this string is identical to another.
*
* @param mixed $other An instance of IString or a PHP string.
*/
public function compare(mixed $other): int {
$other = self::castEncoding($other, $this->encoding);
return strcmp($this->value, (string)$other);
}
/**
* Creates a new identical WString instance.
*
* This method is somewhat pointless, given the immutable nature of this object,
* but rather people calling this instead of calling ->substring(0);
*
* @return WString A new identical instance of WString.
*/
public function clone(): mixed {
return new WString($this->value, $this->encoding);
}
private function doRegex(callable $callback): mixed {
$encoding = self::getRegexEncoding();
self::setRegexEncoding($this->encoding);
$result = $callback();
self::setRegexEncoding($encoding);
return $result;
}
private static function sameEncoding(string $name1, string $name2): bool {
return mb_preferred_mime_name($name1) === mb_preferred_mime_name($name2);
}
/**
* Joins an iterable object together with a separator to create a string.
*
* @param iterable $source Source object.
* @param IString|string $separator Separator to use as glue.
* @param ?string $encoding Desired encoding, null for Index default.
* @return WString Resulting string.
*/
public static function join(iterable $source, IString|string $separator = '', ?string $encoding = null): WString {
// func probably doesn't work entirely as intended
$encoding ??= self::getDefaultEncoding();
$separator = self::castEncoding($separator, $encoding);
if(!is_array($source)) {
$parts = [];
foreach($source as $value)
$parts[] = $value;
$source = $parts;
}
return new WString(implode((string)$separator, $source), $encoding);
}
public static function getDefaultEncoding(): string {
return self::$defaultEncoding;
}
public static function setDefaultEncoding(string $encoding = ''): void {
if(empty($encoding))
self::$defaultEncoding = self::DEFAULT_ENCODING;
else {
try {
if(mb_encoding_aliases($encoding) !== false)
self::$defaultEncoding = $encoding;
} catch(ValueError $ex) {}
}
}
public static function getInternalEncoding(): string {
return mb_internal_encoding();
}
public static function setInternalEncoding(string $encoding): void {
mb_internal_encoding($encoding);
}
public static function getRegexEncoding(): string {
return mb_regex_encoding();
}
public static function setRegexEncoding(string $encoding): void {
mb_regex_encoding($encoding);
}
public static function fromRequest(string $raw): WString {
return new WString($raw, self::getInternalEncoding());
}
/**
* Returns a reusable empty string instance.
*
* @return WString An empty string.
*/
public static function empty(): WString {
static $empty = null;
$empty ??= new WString('');
return $empty;
}
public static function cast(mixed $string): WString {
if($string instanceof WString)
return $string;
if($string instanceof AString)
return $string->toWString('ascii', false);
return new WString((string)$string, 'ascii');
}
public static function castEncoding(mixed $string, string $encoding): WString {
return self::cast($string)->convertEncoding($encoding);
}
}