index/src/WString.php

180 lines
7.7 KiB
PHP
Raw Normal View History

2022-09-13 13:13:11 +00:00
<?php
// WString.php
// Created: 2021-06-22
// Updated: 2024-08-03
2022-09-13 13:13:11 +00:00
namespace Index;
use Stringable;
2022-09-13 13:13:11 +00:00
/**
* Provides various helper methods for multibyte strings.
2022-09-13 13:13:11 +00:00
*/
final class WString {
2022-09-13 13:13:11 +00:00
/**
* Default characters for ::trim, ::trimStart and ::trimEnd.
2022-09-13 13:13:11 +00:00
*
* @var string
2022-09-13 13:13:11 +00:00
*/
public const TRIM_CHARS = "\0\t\n\v\f\r \u{85}\u{a0}\u{1680}\u{180e}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{200b}\u{200c}\u{200d}\u{2028}\u{2029}\u{202f}\u{205f}\u{2060}\u{3000}\u{feff}";
private const TRIM_CHARS_CHARSET = 'UTF-8';
private const TRIM_START = 0x01;
private const TRIM_END = 0x02;
private const TRIM_ALL = self::TRIM_START | self::TRIM_END;
2022-09-13 13:13:11 +00:00
/**
* Checks if a multibyte string starts with a given substring.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string $haystack String to search in.
* @param Stringable|string $needle Sustring to search for in the haystack.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return bool true if haystack begins with needle, false otherwise.
2022-09-13 13:13:11 +00:00
*/
public static function startsWith(Stringable|string $haystack, Stringable|string $needle, ?string $encoding = null): bool {
return mb_strpos((string)$haystack, (string)$needle, encoding: $encoding) === 0;
2022-09-13 13:13:11 +00:00
}
/**
* Checks if a multibyte string ends with a given substring.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string $haystack String to search in.
* @param Stringable|string $needle Sustring to search for in the haystack.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return bool true if haystack ends with needle, false otherwise.
2022-09-13 13:13:11 +00:00
*/
public static function endsWith(Stringable|string $haystack, Stringable|string $needle, ?string $encoding = null): bool {
$haystack = (string)$haystack;
$haystackLength = mb_strlen($haystack, $encoding);
$needle = (string)$needle;
$needleLength = mb_strlen($needle, $encoding);
return mb_substr($haystack, -$needleLength, encoding: $encoding) === $needle;
}
private static function trimInternal(Stringable|string $string, string $chars, ?string $encoding, ?string $charsEncoding, int $flags): string {
$encoding = $encoding === null ? mb_internal_encoding() : mb_preferred_mime_name($encoding);
if($encoding === false) $encoding = 'utf-8';
$charsEncoding = $charsEncoding === null ? self::TRIM_CHARS_CHARSET : mb_preferred_mime_name($charsEncoding);
if($charsEncoding === false) $charsEncoding = 'utf-8';
// this fucks, i hate character sets
if($encoding !== $charsEncoding) {
$questionMarkCharsEnc = mb_convert_encoding('?', $charsEncoding, 'utf-8');
if($questionMarkCharsEnc) $questionMarkCharsEnc = 'utf-8';
$questionMarkStrEnc = mb_convert_encoding('?', $encoding, 'utf-8');
if($questionMarkStrEnc) $questionMarkStrEnc = 'utf-8';
$hasQuestionMark = mb_strpos($chars, $questionMarkCharsEnc, encoding: $charsEncoding) !== false;
$chars = mb_convert_encoding($chars, $encoding, $charsEncoding);
if(!$hasQuestionMark) {
$charsSplit = mb_str_split($chars, encoding: $encoding);
$chars = [];
foreach($charsSplit as $char) {
if(in_array($char, $chars))
continue;
$chars[] = $char;
}
$chars = implode($chars);
}
}
2022-09-13 13:13:11 +00:00
$string = (string)$string;
$split = mb_str_split($string, encoding: $encoding);
$length = mb_strlen($string, $encoding);
2022-09-13 13:13:11 +00:00
$start = 0;
$end = $length - 1;
2022-09-13 13:13:11 +00:00
if($flags & self::TRIM_START)
for(; $start < $length; ++$start)
if(mb_strpos($chars, $split[$start], encoding: $encoding) === false)
break;
2022-09-13 13:13:11 +00:00
if($flags & self::TRIM_END)
for(; $end > 0; --$end)
if(mb_strpos($chars, $split[$end], encoding: $encoding) === false)
break;
return mb_substr($string, $start, $end - $start + 1, $encoding);
2022-09-13 13:13:11 +00:00
}
/**
* Strip whitespace (or other characters) from the start and end of a multibyte string.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string $string Input string.
* @param string $chars Characters to strip. List all characters you want. .. operator from trim is not supported.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return string Trimmed string.
2022-09-13 13:13:11 +00:00
*/
public static function trim(Stringable|string $string, string $chars = self::TRIM_CHARS, ?string $encoding = null, ?string $charsEncoding = null): string {
return self::trimInternal($string, $chars, $encoding, $charsEncoding, self::TRIM_ALL);
2022-09-13 13:13:11 +00:00
}
/**
* Strip whitespace (or other characters) from the start of a multibyte string.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string $string Input string.
* @param string $chars Characters to strip. List all characters you want. .. operator from ltrim is not supported.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return string Trimmed string.
2022-09-13 13:13:11 +00:00
*/
public static function trimStart(Stringable|string $string, string $chars = self::TRIM_CHARS, ?string $encoding = null, ?string $charsEncoding = null): string {
return self::trimInternal($string, $chars, $encoding, $charsEncoding, self::TRIM_START);
2022-09-13 13:13:11 +00:00
}
/**
* Strip whitespace (or other characters) from the end of a multibyte string.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string $string Input string.
* @param string $chars Characters to strip. List all characters you want. .. operator from rtrim is not supported.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return string Trimmed string.
2022-09-13 13:13:11 +00:00
*/
public static function trimEnd(Stringable|string $string, string $chars = self::TRIM_CHARS, ?string $encoding = null, ?string $charsEncoding = null): string {
return self::trimInternal($string, $chars, $encoding, $charsEncoding, self::TRIM_END);
2022-09-13 13:13:11 +00:00
}
/**
* Reverses a multibyte string.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string $string String to reverse.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return string Reversed string.
2022-09-13 13:13:11 +00:00
*/
public static function reverse(Stringable|string $string, ?string $encoding = null): string {
return implode(array_reverse(mb_str_split((string)$string, encoding: $encoding)));
2022-09-13 13:13:11 +00:00
}
/**
* Counts unique characters in a string.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string $string String to count unique characters of.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return int Unique character count.
2022-09-13 13:13:11 +00:00
*/
public static function countUnique(Stringable|string $string, ?string $encoding = null): int {
$string = mb_str_split((string)$string, encoding: $encoding);
$chars = [];
2022-09-13 13:13:11 +00:00
foreach($string as $char)
if(!in_array($char, $chars, true))
$chars[] = $char;
2022-09-13 13:13:11 +00:00
return count($chars);
2022-09-13 13:13:11 +00:00
}
/**
* Check if a multibyte string is null or whitespace.
2022-09-13 13:13:11 +00:00
*
* @param Stringable|string|null $string String to check for whitespace.
* @param ?string $encoding String character encoding. null for mb_internal_encoding value.
* @return bool true if the string is whitespace, false if not.
2022-09-13 13:13:11 +00:00
*/
public static function nullOrWhitespace(Stringable|string|null $string, ?string $encoding = null): bool {
return $string === null || self::trim((string)$string, encoding: $encoding) === '';
2022-09-13 13:13:11 +00:00
}
}