From 584b4c5946123a9defe8caafc073c6add923d9e5 Mon Sep 17 00:00:00 2001 From: flashwave Date: Thu, 24 Dec 2020 03:25:10 +0000 Subject: [PATCH] wip datetime --- lib/FWIF/FWIF.php | 226 +++++++++++++++++++++++++++------- lib/FWIF/FWIFDecodeStream.php | 13 +- public/index.php | 2 +- src/Dummy/DummyPackage.php | 4 + startup.php | 2 + 5 files changed, 199 insertions(+), 48 deletions(-) diff --git a/lib/FWIF/FWIF.php b/lib/FWIF/FWIF.php index bb97148..e040004 100644 --- a/lib/FWIF/FWIF.php +++ b/lib/FWIF/FWIF.php @@ -1,20 +1,26 @@ 'Null', @@ -23,54 +29,76 @@ class FWIF { self::TYPE_STRING => 'String', self::TYPE_ARRAY => 'Array', self::TYPE_OBJECT => 'Object', + self::TYPE_BUFFER => 'Buffer', + self::TYPE_DATETIME => 'DateTime', ]; + private const UTF8 = '%^(?:' // https://www.w3.org/International/questions/qa-forms-utf-8.en + . '[\x09\x0A\x0D\x20-\x7E]' // ASCII + . '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte + . '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs + . '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' // straight 3-byte + . '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates + . '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3 + . '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15 + . '|\xF4[\x80-\x8F][\x80-\xBF]{2}' // plane 16 + . ')*$%xs'; + private static function isAssocArray($array): bool { if(!is_array($array) || $array === []) return false; return array_keys($array) !== range(0, count($array) - 1); } - private static function detectType($data): int { + // apparently this is faster than mb_check_encoding($string, 'utf-8'); + // on PHP 7.1 on Windows at least, perhaps investigate this later + private static function isUTF8String(string $string): bool { + return preg_match(self::UTF8, $string) === 1; + } + + private static function detectType($data, int $flags): int { if(is_null($data)) return self::TYPE_NULL; if(is_int($data)) return self::TYPE_INTEGER; if(is_float($data)) return self::TYPE_FLOAT; - if(is_string($data)) // Should this check if a string is valid UTF-8 and swap over to TYPE_BUFFER? - return self::TYPE_STRING; - if(is_object($data) || self::isAssocArray($data)) + if(is_string($data)) + return self::isUTF8String($data) ? self::TYPE_STRING : self::TYPE_BUFFER; + if(is_object($data) || self::isAssocArray($data)) { + if($data instanceof DateTimeInterface) + return self::TYPE_DATETIME; return self::TYPE_OBJECT; + } if(is_array($data)) return self::TYPE_ARRAY; throw new FWIFUnsupportedTypeException(gettype($data)); } - public static function encode($data): string { + public static function encode($data, int $flags = self::DEFAULT): string { if($data instanceof FWIFSerializable) $data = $data->fwifSerialize(); - $type = self::detectType($data); - return chr($type) . self::{'encode' . self::CODECS[$type]}($data); + $type = self::detectType($data, $flags); + return chr($type) . self::{'encode' . self::CODECS[$type]}($data, $flags); } - public static function decode(string $data) { - return self::decodeInternal(new FWIFDecodeStream($data)); + public static function decode(string $data, int $flags = self::DEFAULT) { + return self::decodeInternal(new FWIFDecodeStream($data), $flags); } - private static function decodeInternal(FWIFDecodeStream $data) { + private static function decodeInternal(FWIFDecodeStream $data, int $flags) { $type = $data->readByte(); if(!array_key_exists($type, self::CODECS)) { $hexType = dechex($type); $hexPos = dechex($data->getPosition()); throw new FWIFUnsupportedTypeException("Unsupported type {$type} (0x{$hexType}) at position {$data->getPosition()} (0x{$hexPos})"); } - return self::{'decode' . self::CODECS[$type]}($data); + return self::{'decode' . self::CODECS[$type]}($data, $flags); } - private static function encodeNull($data): string { return ''; } - private static function decodeNull(FWIFDecodeStream $data) { return null; } + private static function encodeNull($data, int $flags): string { return ''; } + private static function decodeNull(FWIFDecodeStream $data, int $flags) { return null; } - private static function encodeInteger(int $number): string { + private static function encodeInteger(int $number, int $flags): string { $packed = ''; $more = 1; $negative = $number < 0; $size = PHP_INT_SIZE * 8; while($more) { $byte = $number & 0x7F; @@ -85,7 +113,7 @@ class FWIF { } return $packed; } - private static function decodeInteger(FWIFDecodeStream $data): int { + private static function decodeInteger(FWIFDecodeStream $data, int $flags): int { $number = 0; $shift = 0; $o = 0; $size = PHP_INT_SIZE * 8; do { $byte = $data->readByte(); @@ -97,66 +125,172 @@ class FWIF { return $number; } - private static function encodeFloat(float $number): string { + private static function encodeFloat(float $number, int $flags): string { return pack('E', $number); } - private static function decodeFloat(FWIFDecodeStream $data): float { - $packed = ''; for($i = 0; $i < 8; ++$i) $packed .= chr($data->readByte()); - return unpack('E', $packed)[1]; + private static function decodeFloat(FWIFDecodeStream $data, int $flags): float { + return unpack('E', $data->readString(8))[1]; } - private static function encodeString(string $string): string { + private static function encodeString(string $string, int $flags): string { $packed = ''; $string = unpack('C*', mb_convert_encoding($string, 'utf-8')); foreach($string as $char) $packed .= chr($char); - return $packed . chr(self::TYPE_TRAILER); + return $packed . chr(self::TRAILER); } - private static function decodeAsciiString(FWIFDecodeStream $data): string { + private static function decodeAsciiString(FWIFDecodeStream $data, int $flags): string { $string = ''; for(;;) { $byte = $data->readByte(); - if($byte === self::TYPE_TRAILER) + if($byte === self::TRAILER) break; $string .= chr($byte); } return $string; } - private static function decodeString(FWIFDecodeStream $data): string { // This should decode based on the utf-8 spec rather than just - return mb_convert_encoding(self::decodeAsciiString($data), 'utf-8'); // grabbing the FF terminated string representation. + private static function decodeString(FWIFDecodeStream $data, int $flags): string { // This should decode based on the utf-8 spec rather than just + return mb_convert_encoding(self::decodeAsciiString($data, $flags), 'utf-8'); // grabbing the FF terminated string representation. } - private static function encodeArray(array $array): string { + private static function encodeArray(array $array, int $flags): string { $packed = ''; foreach($array as $value) - $packed .= self::encode($value); - return $packed . chr(self::TYPE_TRAILER); + $packed .= self::encode($value, $flags); + return $packed . chr(self::TRAILER); } - private static function decodeArray(FWIFDecodeStream $data): array { + private static function decodeArray(FWIFDecodeStream $data, int $flags): array { $array = []; for(;;) { - if($data->readByte() === self::TYPE_TRAILER) + if($data->readByte() === self::TRAILER) break; $data->stepBack(); - $array[] = self::decodeInternal($data); + $array[] = self::decodeInternal($data, $flags); } return $array; } - private static function encodeObject($object): string { + private static function encodeObject($object, int $flags): string { $packed = ''; $array = (array)$object; foreach($array as $name => $value) - $packed .= $name . chr(self::TYPE_TRAILER) . self::encode($value); - return $packed . chr(self::TYPE_TRAILER); + $packed .= $name . chr(self::TRAILER) . self::encode($value, $flags); + return $packed . chr(self::TRAILER); } - private static function decodeObject(FWIFDecodeStream $data): object { + private static function decodeObject(FWIFDecodeStream $data, int $flags): object { $array = []; for(;;) { - if($data->readByte() === self::TYPE_TRAILER) + if($data->readByte() === self::TRAILER) break; $data->stepBack(); - $name = self::decodeAsciiString($data); - $array[$name] = self::decodeInternal($data); + $name = self::decodeAsciiString($data, $flags); + $array[$name] = self::decodeInternal($data, $flags); } return (object)$array; } + + private static function encodeBuffer(string $buffer, int $flags): string { + return self::encodeInteger(strlen($buffer), $flags) . $buffer; + } + private static function decodeBuffer(FWIFDecodeStream $data, int $flags): string { + return $data->readString(self::decodeInteger($data, $flags)); + } + + private const DATETIME_FLAG_TIME = 0x01; + private const DATETIME_FLAG_MILLI = 0x02; + private const DATETIME_FLAG_MASK = 0x03; + + private const DATETIME_YEAR_SIGN = 0x800000; + private const DATETIME_YEAR_MASK = 0x3FFF; + private const DATETIME_YEAR_SHIFT = 9; + + private const DATETIME_MONTH_MASK = 0x0F; + private const DATETIME_MONTH_SHIFT = 5; + + private const DATETIME_DAY_MASK = 0x1F; + + private const DATETIME_HOUR_MASK = 0x1F; + private const DATETIME_HOUR_SHIFT = 12; + + private const DATETIME_MINS_MASK = 0x3F; + private const DATETIME_MINS_SHIFT = 6; + + private const DATETIME_SECS_MASK = 0x3F; + + private const DATETIME_MILLI_MASK = 0xFFFFF; + + /* One block Chunked + +--------+ +--------+ +--------+ + 0 | YYYYYYY| |YYYYYYYY| |FFF.....| + 8 |YYYYYYYY| |YYYYYYYM| +--------+ + 12 | MMMM| |MMMDDDDD| + 16 | DDDDD| +--------+ + 24 |FFFHHHHH| FHmS and U seperate Semicombined FHmSU + 32 | mmmmmm| +--------+ +--------+ +--------+ +--------+ + 40 | SSSSSS| |FFFHHHHH| |UUUUUUUU| |FFFHHHHH| |UUUUUUUU| + 48 |uuuuuuuu| |mmmmmmSS| |UUUUUUUU| OR |mmmmmmSS| |UUUUUUUU| + 56 |uuuuuuuu| |SSSS....| |UUUU....| |SSSSUUUU| +--------+ + 64 |uuuuuuuu| +--------+ +--------+ +--------+ + +--------+ In both situations the upper byte of FHmS would always be present for the flags. + Meaning the thing would take up at least 4 bytes and at most 9 bytes with 1 unused bit. + Most situations I'd imagine you wouldn't really care about the milliseconds, + in which you end up with 7 bytes with 5 unused bits. + Is there a benefit to this over just using .NET-style ticks? + Yes, it can be entirely handled using just 32-bit integers. + */ + + private static function encodeDateTime(DateTimeInterface $dt, int $flags): string { + static $utc = null; + if($utc === null) + $utc = new DateTimeZone('utc'); + + if($dt->getTimezone()->getOffset($dt) !== 0) + $dt = DateTime::createFromInterface($dt)->setTimezone($utc); + + $dtf = 0; + + $year = (int)$dt->format('Y'); + $month = (int)$dt->format('n'); + $day = (int)$dt->format('j'); + + $subYear = $year < 0; + if($subYear) + $year = ~$year; + + $ymd = $subYear ? self::DATETIME_YEAR_SIGN : 0; + $ymd |= ( $year & self::DATETIME_YEAR_MASK) << self::DATETIME_YEAR_SHIFT; + $ymd |= (($month - 1) & self::DATETIME_MONTH_MASK) << self::DATETIME_MONTH_SHIFT; + $ymd |= (($day - 1) & self::DATETIME_DAY_MASK); + + var_dump(str_pad(decbin($ymd), 24, '0', STR_PAD_LEFT)); + + $hours = (int)$dt->format('G'); + $mins = (int)$dt->format('i'); + $secs = (int)$dt->format('s'); + $millis = ($flags & self::DISCARD_MILLISECONDS) ? 0 : (int)$dt->format('u'); + + // Add year, month, day + + if($hours > 0 || $mins > 0 || $secs > 0 || $millis > 0) { + $dtf |= self::DATETIME_FLAG_TIME; + $hms = 0; + $hms |= ($hours & self::DATETIME_HOUR_MASK) << self::DATETIME_HOUR_SHIFT; + $hms |= ($mins & self::DATETIME_MINS_MASK) << self::DATETIME_MINS_SHIFT; + $hms |= ($secs & self::DATETIME_SECS_MASK); + + var_dump(str_pad(decbin($hms), 17, '0', STR_PAD_LEFT)); + + if($millis > 0) { + $dtf |= self::DATETIME_FLAG_MILLI; + $millis &= self::DATETIME_MILLI_MASK; + + var_dump(str_pad(decbin($millis), 20, '0', STR_PAD_LEFT)); + } + } + + echo "\r\n"; + + return ''; + } + private static function decodeDateTime(FWIFDecodeStream $data, int $flags): DateTimeInterface { + return new \DateTime(); + } } diff --git a/lib/FWIF/FWIFDecodeStream.php b/lib/FWIF/FWIFDecodeStream.php index 3608976..d638571 100644 --- a/lib/FWIF/FWIFDecodeStream.php +++ b/lib/FWIF/FWIFDecodeStream.php @@ -25,7 +25,18 @@ class FWIFDecodeStream { public function readByte(): int { if($this->position + 1 >= $this->length) - return 0xFF; + return FWIF::TRAILER; return ord($this->body[$this->position++]); } + + public function readChar(): string { + return chr($this->readByte()); + } + + public function readString(int $length): string { + // Bounds checks? What are those? + $string = substr($this->body, $this->position, $length); + $this->position += $length; + return $string; + } } diff --git a/public/index.php b/public/index.php index 79ad9ef..04effa5 100644 --- a/public/index.php +++ b/public/index.php @@ -18,7 +18,7 @@ if($request->match('GET', '/packages')) { echo "\r\n\r\n--------------------\r\n\r\n"; - $jsonEncoded = json_encode($packages); + $jsonEncoded = json_encode($packages, JSON_INVALID_UTF8_SUBSTITUTE); echo strlen($jsonEncoded) . ' ' . $jsonEncoded; echo "\r\n\r\n--------------------\r\n\r\n"; diff --git a/src/Dummy/DummyPackage.php b/src/Dummy/DummyPackage.php index fcdbf17..03af523 100644 --- a/src/Dummy/DummyPackage.php +++ b/src/Dummy/DummyPackage.php @@ -36,6 +36,10 @@ class DummyPackage implements IPackage, \JsonSerializable { 'neg32' => -12345678, 'neg64' => -1234567890987654, 'float' => 12345.6789, + 'invalid' => "\xFF\x25\x25\x02\xFF御坂e美琴\xFF\xFF\xFF", + 'datetime' => new \DateTime('2013-01-27 23:14:44 CET'), + 'datetimeNegative' => new \DateTime('-2013-01-27 23:14:44 CET'), + 'datetimeNow' => new \DateTime(), 'array' => ['e', 'a', 0x55], 'object' => new \stdClass, 'misaka' => '御坂 美琴', diff --git a/startup.php b/startup.php index edd9e5c..b71a877 100644 --- a/startup.php +++ b/startup.php @@ -11,6 +11,8 @@ define('PAT_LIB', PAT_ROOT . '/lib'); // Other unresolved namespaces ini_set('display_errors', PAT_DEBUG ? 'on' : 'off'); error_reporting(PAT_DEBUG ? -1 : 0); +mb_internal_encoding('utf-8'); + set_include_path(PAT_SRC . PATH_SEPARATOR . PAT_LIB . PATH_SEPARATOR . get_include_path()); spl_autoload_register(function(string $className) { $parts = explode('\\', trim($className, '\\'), 2);