2022-09-13 13:13:11 +00:00
|
|
|
<?php
|
|
|
|
// WString.php
|
|
|
|
// Created: 2021-06-22
|
2024-08-03 20:27:50 +00:00
|
|
|
// Updated: 2024-08-03
|
2022-09-13 13:13:11 +00:00
|
|
|
|
|
|
|
namespace Index;
|
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
use Stringable;
|
2022-09-13 13:13:11 +00:00
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Provides various helper methods for multibyte strings.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
final class WString {
|
2022-09-13 13:13:11 +00:00
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Default characters for ::trim, ::trimStart and ::trimEnd.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @var string
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public const TRIM_CHARS = "\0\t\n\v\f\r \u{85}\u{a0}\u{1680}\u{180e}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{200b}\u{200c}\u{200d}\u{2028}\u{2029}\u{202f}\u{205f}\u{2060}\u{3000}\u{feff}";
|
|
|
|
private const TRIM_CHARS_CHARSET = 'UTF-8';
|
2023-07-05 01:28:33 +00:00
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
private const TRIM_START = 0x01;
|
|
|
|
private const TRIM_END = 0x02;
|
|
|
|
private const TRIM_ALL = self::TRIM_START | self::TRIM_END;
|
2022-09-13 13:13:11 +00:00
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Checks if a multibyte string starts with a given substring.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string $haystack String to search in.
|
|
|
|
* @param Stringable|string $needle Sustring to search for in the haystack.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return bool true if haystack begins with needle, false otherwise.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function startsWith(Stringable|string $haystack, Stringable|string $needle, ?string $encoding = null): bool {
|
|
|
|
return mb_strpos((string)$haystack, (string)$needle, encoding: $encoding) === 0;
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Checks if a multibyte string ends with a given substring.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string $haystack String to search in.
|
|
|
|
* @param Stringable|string $needle Sustring to search for in the haystack.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return bool true if haystack ends with needle, false otherwise.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function endsWith(Stringable|string $haystack, Stringable|string $needle, ?string $encoding = null): bool {
|
|
|
|
$haystack = (string)$haystack;
|
|
|
|
$haystackLength = mb_strlen($haystack, $encoding);
|
|
|
|
|
|
|
|
$needle = (string)$needle;
|
|
|
|
$needleLength = mb_strlen($needle, $encoding);
|
|
|
|
|
|
|
|
return mb_substr($haystack, -$needleLength, encoding: $encoding) === $needle;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static function trimInternal(Stringable|string $string, string $chars, ?string $encoding, ?string $charsEncoding, int $flags): string {
|
|
|
|
$encoding = $encoding === null ? mb_internal_encoding() : mb_preferred_mime_name($encoding);
|
2024-08-03 20:27:50 +00:00
|
|
|
if($encoding === false) $encoding = 'utf-8';
|
2024-01-04 02:07:43 +00:00
|
|
|
$charsEncoding = $charsEncoding === null ? self::TRIM_CHARS_CHARSET : mb_preferred_mime_name($charsEncoding);
|
2024-08-03 20:27:50 +00:00
|
|
|
if($charsEncoding === false) $charsEncoding = 'utf-8';
|
2024-01-04 02:07:43 +00:00
|
|
|
|
|
|
|
// this fucks, i hate character sets
|
|
|
|
if($encoding !== $charsEncoding) {
|
|
|
|
$questionMarkCharsEnc = mb_convert_encoding('?', $charsEncoding, 'utf-8');
|
2024-08-03 20:27:50 +00:00
|
|
|
if($questionMarkCharsEnc) $questionMarkCharsEnc = 'utf-8';
|
2024-01-04 02:07:43 +00:00
|
|
|
$questionMarkStrEnc = mb_convert_encoding('?', $encoding, 'utf-8');
|
2024-08-03 20:27:50 +00:00
|
|
|
if($questionMarkStrEnc) $questionMarkStrEnc = 'utf-8';
|
2024-01-04 02:07:43 +00:00
|
|
|
$hasQuestionMark = mb_strpos($chars, $questionMarkCharsEnc, encoding: $charsEncoding) !== false;
|
|
|
|
$chars = mb_convert_encoding($chars, $encoding, $charsEncoding);
|
|
|
|
|
|
|
|
if(!$hasQuestionMark) {
|
|
|
|
$charsSplit = mb_str_split($chars, encoding: $encoding);
|
|
|
|
$chars = [];
|
|
|
|
foreach($charsSplit as $char) {
|
|
|
|
if(in_array($char, $chars))
|
|
|
|
continue;
|
|
|
|
$chars[] = $char;
|
|
|
|
}
|
|
|
|
|
|
|
|
$chars = implode($chars);
|
|
|
|
}
|
|
|
|
}
|
2022-09-13 13:13:11 +00:00
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
$string = (string)$string;
|
|
|
|
$split = mb_str_split($string, encoding: $encoding);
|
|
|
|
$length = mb_strlen($string, $encoding);
|
2022-09-13 13:13:11 +00:00
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
$start = 0;
|
|
|
|
$end = $length - 1;
|
2022-09-13 13:13:11 +00:00
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
if($flags & self::TRIM_START)
|
|
|
|
for(; $start < $length; ++$start)
|
|
|
|
if(mb_strpos($chars, $split[$start], encoding: $encoding) === false)
|
|
|
|
break;
|
2022-09-13 13:13:11 +00:00
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
if($flags & self::TRIM_END)
|
|
|
|
for(; $end > 0; --$end)
|
|
|
|
if(mb_strpos($chars, $split[$end], encoding: $encoding) === false)
|
|
|
|
break;
|
|
|
|
|
|
|
|
return mb_substr($string, $start, $end - $start + 1, $encoding);
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Strip whitespace (or other characters) from the start and end of a multibyte string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string $string Input string.
|
|
|
|
* @param string $chars Characters to strip. List all characters you want. .. operator from trim is not supported.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return string Trimmed string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function trim(Stringable|string $string, string $chars = self::TRIM_CHARS, ?string $encoding = null, ?string $charsEncoding = null): string {
|
|
|
|
return self::trimInternal($string, $chars, $encoding, $charsEncoding, self::TRIM_ALL);
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Strip whitespace (or other characters) from the start of a multibyte string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string $string Input string.
|
|
|
|
* @param string $chars Characters to strip. List all characters you want. .. operator from ltrim is not supported.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return string Trimmed string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function trimStart(Stringable|string $string, string $chars = self::TRIM_CHARS, ?string $encoding = null, ?string $charsEncoding = null): string {
|
|
|
|
return self::trimInternal($string, $chars, $encoding, $charsEncoding, self::TRIM_START);
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Strip whitespace (or other characters) from the end of a multibyte string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string $string Input string.
|
|
|
|
* @param string $chars Characters to strip. List all characters you want. .. operator from rtrim is not supported.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return string Trimmed string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function trimEnd(Stringable|string $string, string $chars = self::TRIM_CHARS, ?string $encoding = null, ?string $charsEncoding = null): string {
|
|
|
|
return self::trimInternal($string, $chars, $encoding, $charsEncoding, self::TRIM_END);
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Reverses a multibyte string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string $string String to reverse.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return string Reversed string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function reverse(Stringable|string $string, ?string $encoding = null): string {
|
|
|
|
return implode(array_reverse(mb_str_split((string)$string, encoding: $encoding)));
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Counts unique characters in a string.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string $string String to count unique characters of.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return int Unique character count.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function countUnique(Stringable|string $string, ?string $encoding = null): int {
|
|
|
|
$string = mb_str_split((string)$string, encoding: $encoding);
|
|
|
|
$chars = [];
|
2022-09-13 13:13:11 +00:00
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
foreach($string as $char)
|
|
|
|
if(!in_array($char, $chars, true))
|
|
|
|
$chars[] = $char;
|
2022-09-13 13:13:11 +00:00
|
|
|
|
2024-01-04 02:07:43 +00:00
|
|
|
return count($chars);
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-01-04 02:07:43 +00:00
|
|
|
* Check if a multibyte string is null or whitespace.
|
2022-09-13 13:13:11 +00:00
|
|
|
*
|
2024-01-04 02:07:43 +00:00
|
|
|
* @param Stringable|string|null $string String to check for whitespace.
|
|
|
|
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
|
|
|
|
* @return bool true if the string is whitespace, false if not.
|
2022-09-13 13:13:11 +00:00
|
|
|
*/
|
2024-01-04 02:07:43 +00:00
|
|
|
public static function nullOrWhitespace(Stringable|string|null $string, ?string $encoding = null): bool {
|
|
|
|
return $string === null || self::trim((string)$string, encoding: $encoding) === '';
|
2022-09-13 13:13:11 +00:00
|
|
|
}
|
|
|
|
}
|