From 16967a0d27d389eab0c8c069deb968eefc082de9 Mon Sep 17 00:00:00 2001 From: flashwave Date: Sat, 26 Dec 2020 04:12:26 +0000 Subject: [PATCH] more fwiffing --- lib/FWIF/FWIF.php | 236 ++++++++++++------- lib/FWIF/FWIFDecodeStream.php | 42 ---- lib/FWIF/FWIFUnsupportedVersionException.php | 4 + public/index.php | 4 +- src/Dummy/DummyPackage.php | 8 +- 5 files changed, 158 insertions(+), 136 deletions(-) delete mode 100644 lib/FWIF/FWIFDecodeStream.php create mode 100644 lib/FWIF/FWIFUnsupportedVersionException.php diff --git a/lib/FWIF/FWIF.php b/lib/FWIF/FWIF.php index e040004..ae77074 100644 --- a/lib/FWIF/FWIF.php +++ b/lib/FWIF/FWIF.php @@ -1,15 +1,19 @@ 'Null', self::TYPE_INTEGER => 'Integer', @@ -52,6 +58,7 @@ class FWIF { // apparently this is faster than mb_check_encoding($string, 'utf-8'); // on PHP 7.1 on Windows at least, perhaps investigate this later + // UPDATE TODO: does this even make any sense with other internal encodings? private static function isUTF8String(string $string): bool { return preg_match(self::UTF8, $string) === 1; } @@ -76,27 +83,50 @@ class FWIF { } public static function encode($data, int $flags = self::DEFAULT): string { + $encoded = self::encodeInternal($data, $flags); + if(!($flags & self::EXCLUDE_VERSION)) + $encoded = chr(self::VERSION) . $encoded; + return $encoded; + } + private static function encodeInternal($data, int $flags): string { if($data instanceof FWIFSerializable) $data = $data->fwifSerialize(); $type = self::detectType($data, $flags); return chr($type) . self::{'encode' . self::CODECS[$type]}($data, $flags); } - public static function decode(string $data, int $flags = self::DEFAULT) { - return self::decodeInternal(new FWIFDecodeStream($data), $flags); + public static function decode($data, int $flags = self::DEFAULT) { + if(is_string($data)) { + $fd = fopen('php://memory', 'rb+'); + fwrite($fd, $data); + fseek($fd, 0); + $data = $fd; + } + if(!is_resource($data)) + throw new InvalidArgumentException('$data must be either a string or a file handle.'); + if(!($flags & self::EXCLUDE_VERSION)) { + $version = ord(fgetc($data)); + if($version < 1 || $version > 254) + throw new InvalidArgumentException('$data is not a valid FWIF serialized stream.'); + if($version > self::VERSION) + throw new FWIFUnsupportedVersionException; + } + $decoded = self::decodeInternal($data, $flags); + if(isset($fd)) + fclose($fd); + return $decoded; } - - private static function decodeInternal(FWIFDecodeStream $data, int $flags) { - $type = $data->readByte(); + private static function decodeInternal($data, int $flags) { + $type = ord(fgetc($data)); if(!array_key_exists($type, self::CODECS)) { - $hexType = dechex($type); $hexPos = dechex($data->getPosition()); - throw new FWIFUnsupportedTypeException("Unsupported type {$type} (0x{$hexType}) at position {$data->getPosition()} (0x{$hexPos})"); + $hexType = dechex($type); $pos = ftell($data); $hexPos = dechex($pos); + throw new FWIFUnsupportedTypeException("Unsupported type {$type} (0x{$hexType}) at position {$pos} (0x{$hexPos})"); } return self::{'decode' . self::CODECS[$type]}($data, $flags); } private static function encodeNull($data, int $flags): string { return ''; } - private static function decodeNull(FWIFDecodeStream $data, int $flags) { return null; } + private static function decodeNull($data, int $flags) { return null; } private static function encodeInteger(int $number, int $flags): string { $packed = ''; $more = 1; $negative = $number < 0; $size = PHP_INT_SIZE * 8; @@ -113,10 +143,10 @@ class FWIF { } return $packed; } - private static function decodeInteger(FWIFDecodeStream $data, int $flags): int { + private static function decodeInteger($data, int $flags): int { $number = 0; $shift = 0; $o = 0; $size = PHP_INT_SIZE * 8; do { - $byte = $data->readByte(); + $byte = ord(fgetc($data)); $number |= ($byte & 0x7F) << $shift; $shift += 7; } while($byte & 0x80); @@ -125,45 +155,50 @@ class FWIF { return $number; } + // I still don't like these private static function encodeFloat(float $number, int $flags): string { return pack('E', $number); } - private static function decodeFloat(FWIFDecodeStream $data, int $flags): float { - return unpack('E', $data->readString(8))[1]; + private static function decodeFloat($data, int $flags): float { + return unpack('E', fread($data, 8))[1]; } private static function encodeString(string $string, int $flags): string { - $packed = ''; $string = unpack('C*', mb_convert_encoding($string, 'utf-8')); + $packed = ''; + $string = unpack('C*', mb_convert_encoding($string, 'utf-8', mb_internal_encoding())); foreach($string as $char) $packed .= chr($char); return $packed . chr(self::TRAILER); } - private static function decodeAsciiString(FWIFDecodeStream $data, int $flags): string { - $string = ''; + private static function decodeString($data, int $flags): string { + $packed = ''; for(;;) { - $byte = $data->readByte(); - if($byte === self::TRAILER) + $char = fgetc($data); $byte = ord($char); + if($byte == self::TRAILER) break; - $string .= chr($byte); + $packed .= $char; + if(($byte & 0xF8) == 0xF0) + $packed .= fread($data, 3); + elseif(($byte & 0xF0) == 0xE0) + $packed .= fread($data, 2); + elseif(($byte & 0xE0) == 0xC0) + $packed .= fgetc($data); } - return $string; - } - private static function decodeString(FWIFDecodeStream $data, int $flags): string { // This should decode based on the utf-8 spec rather than just - return mb_convert_encoding(self::decodeAsciiString($data, $flags), 'utf-8'); // grabbing the FF terminated string representation. + return mb_convert_encoding($packed, mb_internal_encoding(), 'utf-8'); } private static function encodeArray(array $array, int $flags): string { $packed = ''; foreach($array as $value) - $packed .= self::encode($value, $flags); + $packed .= self::encodeInternal($value, $flags); return $packed . chr(self::TRAILER); } - private static function decodeArray(FWIFDecodeStream $data, int $flags): array { + private static function decodeArray($data, int $flags): array { $array = []; for(;;) { - if($data->readByte() === self::TRAILER) + if(ord(fgetc($data)) === self::TRAILER) break; - $data->stepBack(); + fseek($data, -1, SEEK_CUR); $array[] = self::decodeInternal($data, $flags); } return $array; @@ -172,17 +207,26 @@ class FWIF { private static function encodeObject($object, int $flags): string { $packed = ''; $array = (array)$object; foreach($array as $name => $value) - $packed .= $name . chr(self::TRAILER) . self::encode($value, $flags); + $packed .= mb_convert_encoding($name, 'us-ascii', mb_internal_encoding()) . chr(self::TRAILER) . self::encodeInternal($value, $flags); return $packed . chr(self::TRAILER); } - private static function decodeObject(FWIFDecodeStream $data, int $flags): object { + private static function decodeObjectKey($data, int $flags): string { + $packed = ''; + for(;;) { + $char = fgetc($data); + if(ord($char) === self::TRAILER) + break; + $packed .= $char; + } + return mb_convert_encoding($packed, mb_internal_encoding(), 'us-ascii'); + } + private static function decodeObject($data, int $flags): object { $array = []; for(;;) { - if($data->readByte() === self::TRAILER) + if(ord(fgetc($data)) === self::TRAILER) break; - $data->stepBack(); - $name = self::decodeAsciiString($data, $flags); - $array[$name] = self::decodeInternal($data, $flags); + fseek($data, -1, SEEK_CUR); + $array[self::decodeObjectKey($data, $flags)] = self::decodeInternal($data, $flags); } return (object)$array; } @@ -190,51 +234,41 @@ class FWIF { private static function encodeBuffer(string $buffer, int $flags): string { return self::encodeInteger(strlen($buffer), $flags) . $buffer; } - private static function decodeBuffer(FWIFDecodeStream $data, int $flags): string { - return $data->readString(self::decodeInteger($data, $flags)); + private static function decodeBuffer($data, int $flags): string { + return fread($data, self::decodeInteger($data, $flags)); } - private const DATETIME_FLAG_TIME = 0x01; - private const DATETIME_FLAG_MILLI = 0x02; - private const DATETIME_FLAG_MASK = 0x03; + private const DATETIME_FLAG_TIME = 0x40; + private const DATETIME_FLAG_MILLI = 0x20; - private const DATETIME_YEAR_SIGN = 0x800000; + private const DATETIME_YEAR_SIGN = 0x40000000; private const DATETIME_YEAR_MASK = 0x3FFF; - private const DATETIME_YEAR_SHIFT = 9; + private const DATETIME_YEAR_SHIFT = 16; // << private const DATETIME_MONTH_MASK = 0x0F; - private const DATETIME_MONTH_SHIFT = 5; + private const DATETIME_MONTH_SHIFT = 12; // << private const DATETIME_DAY_MASK = 0x1F; + private const DATETIME_DAY_SHIFT = 7; // << private const DATETIME_HOUR_MASK = 0x1F; - private const DATETIME_HOUR_SHIFT = 12; private const DATETIME_MINS_MASK = 0x3F; - private const DATETIME_MINS_SHIFT = 6; + private const DATETIME_MINS_SHIFT = 8; // << private const DATETIME_SECS_MASK = 0x3F; + private const DATETIME_SECS_SHIFT = 2; // << - private const DATETIME_MILLI_MASK = 0xFFFFF; + private const DATETIME_MILLI_HI_MASK = 0x300; + private const DATETIME_MILLI_HI_SHIFT = 8; // >> + private const DATETIME_MILLI_LO_MASK = 0x0FF; - /* One block Chunked - +--------+ +--------+ +--------+ - 0 | YYYYYYY| |YYYYYYYY| |FFF.....| - 8 |YYYYYYYY| |YYYYYYYM| +--------+ - 12 | MMMM| |MMMDDDDD| - 16 | DDDDD| +--------+ - 24 |FFFHHHHH| FHmS and U seperate Semicombined FHmSU - 32 | mmmmmm| +--------+ +--------+ +--------+ +--------+ - 40 | SSSSSS| |FFFHHHHH| |UUUUUUUU| |FFFHHHHH| |UUUUUUUU| - 48 |uuuuuuuu| |mmmmmmSS| |UUUUUUUU| OR |mmmmmmSS| |UUUUUUUU| - 56 |uuuuuuuu| |SSSS....| |UUUU....| |SSSSUUUU| +--------+ - 64 |uuuuuuuu| +--------+ +--------+ +--------+ - +--------+ In both situations the upper byte of FHmS would always be present for the flags. - Meaning the thing would take up at least 4 bytes and at most 9 bytes with 1 unused bit. - Most situations I'd imagine you wouldn't really care about the milliseconds, - in which you end up with 7 bytes with 5 unused bits. - Is there a benefit to this over just using .NET-style ticks? - Yes, it can be entirely handled using just 32-bit integers. + /* +--------+--------+ + * |.YYYYYYY|YYYYYYYY| + * |MMMMDDDD|DTVHHHHH| + * |..mmmmmm|SSSSSSvv| + * |vvvvvvvv| | + * +--------+--------+ */ private static function encodeDateTime(DateTimeInterface $dt, int $flags): string { @@ -245,52 +279,74 @@ class FWIF { if($dt->getTimezone()->getOffset($dt) !== 0) $dt = DateTime::createFromInterface($dt)->setTimezone($utc); - $dtf = 0; - $year = (int)$dt->format('Y'); $month = (int)$dt->format('n'); $day = (int)$dt->format('j'); + $hours = (int)$dt->format('G'); $subYear = $year < 0; if($subYear) $year = ~$year; - $ymd = $subYear ? self::DATETIME_YEAR_SIGN : 0; - $ymd |= ( $year & self::DATETIME_YEAR_MASK) << self::DATETIME_YEAR_SHIFT; - $ymd |= (($month - 1) & self::DATETIME_MONTH_MASK) << self::DATETIME_MONTH_SHIFT; - $ymd |= (($day - 1) & self::DATETIME_DAY_MASK); + $ymdfh = $subYear ? self::DATETIME_YEAR_SIGN : 0; + $ymdfh |= ($year & self::DATETIME_YEAR_MASK) << self::DATETIME_YEAR_SHIFT; + $ymdfh |= ($month & self::DATETIME_MONTH_MASK) << self::DATETIME_MONTH_SHIFT; + $ymdfh |= ($day & self::DATETIME_DAY_MASK) << self::DATETIME_DAY_SHIFT; + $ymdfh |= ($hours & self::DATETIME_HOUR_MASK); - var_dump(str_pad(decbin($ymd), 24, '0', STR_PAD_LEFT)); - - $hours = (int)$dt->format('G'); $mins = (int)$dt->format('i'); $secs = (int)$dt->format('s'); - $millis = ($flags & self::DISCARD_MILLISECONDS) ? 0 : (int)$dt->format('u'); + $millis = ($flags & self::DISCARD_MILLISECONDS) ? 0 : (int)$dt->format('v'); - // Add year, month, day - - if($hours > 0 || $mins > 0 || $secs > 0 || $millis > 0) { - $dtf |= self::DATETIME_FLAG_TIME; - $hms = 0; - $hms |= ($hours & self::DATETIME_HOUR_MASK) << self::DATETIME_HOUR_SHIFT; - $hms |= ($mins & self::DATETIME_MINS_MASK) << self::DATETIME_MINS_SHIFT; - $hms |= ($secs & self::DATETIME_SECS_MASK); - - var_dump(str_pad(decbin($hms), 17, '0', STR_PAD_LEFT)); + if($mins > 0 || $secs > 0 || $millis > 0) { + $ymdfh |= self::DATETIME_FLAG_TIME; + $msv = 0; + $msv |= ($mins & self::DATETIME_MINS_MASK) << self::DATETIME_MINS_SHIFT; + $msv |= ($secs & self::DATETIME_SECS_MASK) << self::DATETIME_SECS_SHIFT; if($millis > 0) { - $dtf |= self::DATETIME_FLAG_MILLI; - $millis &= self::DATETIME_MILLI_MASK; - - var_dump(str_pad(decbin($millis), 20, '0', STR_PAD_LEFT)); + $ymdfh |= self::DATETIME_FLAG_MILLI; + $msv |= ($millis & self::DATETIME_MILLI_HI_MASK) >> self::DATETIME_MILLI_HI_SHIFT; + $v = $millis & self::DATETIME_MILLI_LO_MASK; } } - echo "\r\n"; + $packed = pack('N', $ymdfh); + if($ymdfh & self::DATETIME_FLAG_TIME) { + $packed .= pack('n', $msv); + if($ymdfh & self::DATETIME_FLAG_MILLI) + $packed .= chr($v); + } - return ''; + return $packed; } - private static function decodeDateTime(FWIFDecodeStream $data, int $flags): DateTimeInterface { - return new \DateTime(); + private static function decodeDateTime($data, int $flags): DateTimeInterface { + $ymdfh = unpack('N', fread($data, 4))[1]; + $hasMsv = $ymdfh & self::DATETIME_FLAG_TIME; + $hasV = $hasMsv && ($ymdfh & self::DATETIME_FLAG_MILLI); + $msv = $hasMsv ? unpack('n', fread($data, 2))[1] : 0; + $v = $hasV ? ord(fgetc($data)) : 0; + + $year = ($ymdfh >> self::DATETIME_YEAR_SHIFT) & self::DATETIME_YEAR_MASK; + $month = ($ymdfh >> self::DATETIME_MONTH_SHIFT) & self::DATETIME_MONTH_MASK; + $day = ($ymdfh >> self::DATETIME_DAY_SHIFT) & self::DATETIME_DAY_MASK; + $hour = $ymdfh & self::DATETIME_HOUR_MASK; + + if($ymdfh & self::DATETIME_YEAR_SIGN) + $year = ~$year; + $dt = sprintf('%04d-%02d-%02dT%02d:', $year, $month, $day, $hour); + + if($hasMsv) { + $mins = ($msv >> self::DATETIME_MINS_SHIFT) & self::DATETIME_MINS_MASK; + $secs = ($msv >> self::DATETIME_SECS_SHIFT) & self::DATETIME_SECS_MASK; + $dt .= sprintf('%02d:%02d', $mins, $secs); + if($hasV) { + $millis = ($msv << self::DATETIME_MILLI_HI_SHIFT) & self::DATETIME_MILLI_HI_MASK; + $millis |= $v; + $dt .= sprintf('.%03d', $millis); + } + } else $dt .= '00:00'; + + return new DateTimeImmutable($dt); } } diff --git a/lib/FWIF/FWIFDecodeStream.php b/lib/FWIF/FWIFDecodeStream.php deleted file mode 100644 index d638571..0000000 --- a/lib/FWIF/FWIFDecodeStream.php +++ /dev/null @@ -1,42 +0,0 @@ -body = $body; - $this->length = strlen($body); - } - - public function getLength(): int { - return $this->length; - } - - public function getPosition(): int { - return $this->position; - } - - public function stepBack(): void { - $this->position = max(0, $this->position - 1); - } - - public function readByte(): int { - if($this->position + 1 >= $this->length) - return FWIF::TRAILER; - return ord($this->body[$this->position++]); - } - - public function readChar(): string { - return chr($this->readByte()); - } - - public function readString(int $length): string { - // Bounds checks? What are those? - $string = substr($this->body, $this->position, $length); - $this->position += $length; - return $string; - } -} diff --git a/lib/FWIF/FWIFUnsupportedVersionException.php b/lib/FWIF/FWIFUnsupportedVersionException.php new file mode 100644 index 0000000..89e1994 --- /dev/null +++ b/lib/FWIF/FWIFUnsupportedVersionException.php @@ -0,0 +1,4 @@ +match('GET', '/packages')) { $packages = empty($tags) ? Patchouli::getPackages() : Patchouli::getPackagesWithTags($tags); $encoded = FWIF::encode($packages); - echo strlen($encoded) . ' ' . $encoded; + echo 'FWIF ' . strlen($encoded) . ' bytes ' . $encoded; echo "\r\n\r\n--------------------\r\n\r\n"; $jsonEncoded = json_encode($packages, JSON_INVALID_UTF8_SUBSTITUTE); - echo strlen($jsonEncoded) . ' ' . $jsonEncoded; + echo 'JSON ' . strlen($jsonEncoded) . ' bytes ' . $jsonEncoded; echo "\r\n\r\n--------------------\r\n\r\n"; diff --git a/src/Dummy/DummyPackage.php b/src/Dummy/DummyPackage.php index 03af523..df8a647 100644 --- a/src/Dummy/DummyPackage.php +++ b/src/Dummy/DummyPackage.php @@ -36,7 +36,7 @@ class DummyPackage implements IPackage, \JsonSerializable { 'neg32' => -12345678, 'neg64' => -1234567890987654, 'float' => 12345.6789, - 'invalid' => "\xFF\x25\x25\x02\xFF御坂e美琴\xFF\xFF\xFF", + 'invalid' => "\xFF\x25\x25\x02\xFF蠕。蝮F鄒守清\xFF\xFF\xFF", 'datetime' => new \DateTime('2013-01-27 23:14:44 CET'), 'datetimeNegative' => new \DateTime('-2013-01-27 23:14:44 CET'), 'datetimeNow' => new \DateTime(), @@ -54,6 +54,10 @@ class DummyPackage implements IPackage, \JsonSerializable { } public function jsonSerialize() { - return $this->fwifSerialize(); + $serial = $this->fwifSerialize(); + $serial['datetime'] = $serial['datetime']->format(\DateTimeInterface::ATOM); + $serial['datetimeNegative'] = $serial['datetimeNegative']->format(\DateTimeInterface::ATOM); + $serial['datetimeNow'] = $serial['datetimeNow']->format(\DateTimeInterface::ATOM); + return $serial; } }