more fwiffing

This commit is contained in:
flash 2020-12-26 04:12:26 +00:00
parent 584b4c5946
commit 16967a0d27
5 changed files with 158 additions and 136 deletions

View file

@ -1,15 +1,19 @@
<?php <?php
namespace FWIF; namespace FWIF;
use DateInterval;
use DateTime; use DateTime;
use DateTimeInterface; use DateTimeInterface;
use DateTimeImmutable;
use DateTimeZone; use DateTimeZone;
use InvalidArgumentException;
class FWIF { class FWIF {
public const CONTENT_TYPE = 'text/plain; charset=us-ascii'; // TODO: come up with a mime type public const CONTENT_TYPE = 'text/plain; charset=us-ascii'; // TODO: come up with a mime type
public const DEFAULT = 0; public const DEFAULT = 0;
public const DISCARD_MILLISECONDS = 0x01; // Always exclude the millisecond component from DateTime public const DISCARD_MILLISECONDS = 0x01; // Always exclude the millisecond component from DateTime
public const EXCLUDE_VERSION = 0x02; // Exclude version byte at the start of the stream
public const TYPE_NULL = 0; // NULL, no data public const TYPE_NULL = 0; // NULL, no data
public const TYPE_INTEGER = 0x01; // LEB128, implicit length public const TYPE_INTEGER = 0x01; // LEB128, implicit length
@ -18,10 +22,12 @@ class FWIF {
public const TYPE_BUFFER = 0x04; // Buffer with binary data, prefixed with a LEB128 length public const TYPE_BUFFER = 0x04; // Buffer with binary data, prefixed with a LEB128 length
public const TYPE_ARRAY = 0x05; // List of values, terminated with TRAILER public const TYPE_ARRAY = 0x05; // List of values, terminated with TRAILER
public const TYPE_OBJECT = 0x06; // List of values with ASCII names, terminated with TRAILER public const TYPE_OBJECT = 0x06; // List of values with ASCII names, terminated with TRAILER
public const TYPE_DATETIME = 0x07; // A gregorian year, month and day as well as an hour, minute and seconds component, fixed length of * bytes public const TYPE_DATETIME = 0x07; // A gregorian year, month and day as well as an hour, minute, seconds and millisecond component, variable ranging from 4 to 7 bytes
public const TRAILER = 0xFF; // Termination byte public const TRAILER = 0xFF; // Termination byte
public const VERSION = 0x01; // min 1, max 254
private const CODECS = [ private const CODECS = [
self::TYPE_NULL => 'Null', self::TYPE_NULL => 'Null',
self::TYPE_INTEGER => 'Integer', self::TYPE_INTEGER => 'Integer',
@ -52,6 +58,7 @@ class FWIF {
// apparently this is faster than mb_check_encoding($string, 'utf-8'); // apparently this is faster than mb_check_encoding($string, 'utf-8');
// on PHP 7.1 on Windows at least, perhaps investigate this later // on PHP 7.1 on Windows at least, perhaps investigate this later
// UPDATE TODO: does this even make any sense with other internal encodings?
private static function isUTF8String(string $string): bool { private static function isUTF8String(string $string): bool {
return preg_match(self::UTF8, $string) === 1; return preg_match(self::UTF8, $string) === 1;
} }
@ -76,27 +83,50 @@ class FWIF {
} }
public static function encode($data, int $flags = self::DEFAULT): string { public static function encode($data, int $flags = self::DEFAULT): string {
$encoded = self::encodeInternal($data, $flags);
if(!($flags & self::EXCLUDE_VERSION))
$encoded = chr(self::VERSION) . $encoded;
return $encoded;
}
private static function encodeInternal($data, int $flags): string {
if($data instanceof FWIFSerializable) if($data instanceof FWIFSerializable)
$data = $data->fwifSerialize(); $data = $data->fwifSerialize();
$type = self::detectType($data, $flags); $type = self::detectType($data, $flags);
return chr($type) . self::{'encode' . self::CODECS[$type]}($data, $flags); return chr($type) . self::{'encode' . self::CODECS[$type]}($data, $flags);
} }
public static function decode(string $data, int $flags = self::DEFAULT) { public static function decode($data, int $flags = self::DEFAULT) {
return self::decodeInternal(new FWIFDecodeStream($data), $flags); if(is_string($data)) {
$fd = fopen('php://memory', 'rb+');
fwrite($fd, $data);
fseek($fd, 0);
$data = $fd;
}
if(!is_resource($data))
throw new InvalidArgumentException('$data must be either a string or a file handle.');
if(!($flags & self::EXCLUDE_VERSION)) {
$version = ord(fgetc($data));
if($version < 1 || $version > 254)
throw new InvalidArgumentException('$data is not a valid FWIF serialized stream.');
if($version > self::VERSION)
throw new FWIFUnsupportedVersionException;
}
$decoded = self::decodeInternal($data, $flags);
if(isset($fd))
fclose($fd);
return $decoded;
} }
private static function decodeInternal($data, int $flags) {
private static function decodeInternal(FWIFDecodeStream $data, int $flags) { $type = ord(fgetc($data));
$type = $data->readByte();
if(!array_key_exists($type, self::CODECS)) { if(!array_key_exists($type, self::CODECS)) {
$hexType = dechex($type); $hexPos = dechex($data->getPosition()); $hexType = dechex($type); $pos = ftell($data); $hexPos = dechex($pos);
throw new FWIFUnsupportedTypeException("Unsupported type {$type} (0x{$hexType}) at position {$data->getPosition()} (0x{$hexPos})"); throw new FWIFUnsupportedTypeException("Unsupported type {$type} (0x{$hexType}) at position {$pos} (0x{$hexPos})");
} }
return self::{'decode' . self::CODECS[$type]}($data, $flags); return self::{'decode' . self::CODECS[$type]}($data, $flags);
} }
private static function encodeNull($data, int $flags): string { return ''; } private static function encodeNull($data, int $flags): string { return ''; }
private static function decodeNull(FWIFDecodeStream $data, int $flags) { return null; } private static function decodeNull($data, int $flags) { return null; }
private static function encodeInteger(int $number, int $flags): string { private static function encodeInteger(int $number, int $flags): string {
$packed = ''; $more = 1; $negative = $number < 0; $size = PHP_INT_SIZE * 8; $packed = ''; $more = 1; $negative = $number < 0; $size = PHP_INT_SIZE * 8;
@ -113,10 +143,10 @@ class FWIF {
} }
return $packed; return $packed;
} }
private static function decodeInteger(FWIFDecodeStream $data, int $flags): int { private static function decodeInteger($data, int $flags): int {
$number = 0; $shift = 0; $o = 0; $size = PHP_INT_SIZE * 8; $number = 0; $shift = 0; $o = 0; $size = PHP_INT_SIZE * 8;
do { do {
$byte = $data->readByte(); $byte = ord(fgetc($data));
$number |= ($byte & 0x7F) << $shift; $number |= ($byte & 0x7F) << $shift;
$shift += 7; $shift += 7;
} while($byte & 0x80); } while($byte & 0x80);
@ -125,45 +155,50 @@ class FWIF {
return $number; return $number;
} }
// I still don't like these
private static function encodeFloat(float $number, int $flags): string { private static function encodeFloat(float $number, int $flags): string {
return pack('E', $number); return pack('E', $number);
} }
private static function decodeFloat(FWIFDecodeStream $data, int $flags): float { private static function decodeFloat($data, int $flags): float {
return unpack('E', $data->readString(8))[1]; return unpack('E', fread($data, 8))[1];
} }
private static function encodeString(string $string, int $flags): string { private static function encodeString(string $string, int $flags): string {
$packed = ''; $string = unpack('C*', mb_convert_encoding($string, 'utf-8')); $packed = '';
$string = unpack('C*', mb_convert_encoding($string, 'utf-8', mb_internal_encoding()));
foreach($string as $char) foreach($string as $char)
$packed .= chr($char); $packed .= chr($char);
return $packed . chr(self::TRAILER); return $packed . chr(self::TRAILER);
} }
private static function decodeAsciiString(FWIFDecodeStream $data, int $flags): string { private static function decodeString($data, int $flags): string {
$string = ''; $packed = '';
for(;;) { for(;;) {
$byte = $data->readByte(); $char = fgetc($data); $byte = ord($char);
if($byte === self::TRAILER) if($byte == self::TRAILER)
break; break;
$string .= chr($byte); $packed .= $char;
if(($byte & 0xF8) == 0xF0)
$packed .= fread($data, 3);
elseif(($byte & 0xF0) == 0xE0)
$packed .= fread($data, 2);
elseif(($byte & 0xE0) == 0xC0)
$packed .= fgetc($data);
} }
return $string; return mb_convert_encoding($packed, mb_internal_encoding(), 'utf-8');
}
private static function decodeString(FWIFDecodeStream $data, int $flags): string { // This should decode based on the utf-8 spec rather than just
return mb_convert_encoding(self::decodeAsciiString($data, $flags), 'utf-8'); // grabbing the FF terminated string representation.
} }
private static function encodeArray(array $array, int $flags): string { private static function encodeArray(array $array, int $flags): string {
$packed = ''; $packed = '';
foreach($array as $value) foreach($array as $value)
$packed .= self::encode($value, $flags); $packed .= self::encodeInternal($value, $flags);
return $packed . chr(self::TRAILER); return $packed . chr(self::TRAILER);
} }
private static function decodeArray(FWIFDecodeStream $data, int $flags): array { private static function decodeArray($data, int $flags): array {
$array = []; $array = [];
for(;;) { for(;;) {
if($data->readByte() === self::TRAILER) if(ord(fgetc($data)) === self::TRAILER)
break; break;
$data->stepBack(); fseek($data, -1, SEEK_CUR);
$array[] = self::decodeInternal($data, $flags); $array[] = self::decodeInternal($data, $flags);
} }
return $array; return $array;
@ -172,17 +207,26 @@ class FWIF {
private static function encodeObject($object, int $flags): string { private static function encodeObject($object, int $flags): string {
$packed = ''; $array = (array)$object; $packed = ''; $array = (array)$object;
foreach($array as $name => $value) foreach($array as $name => $value)
$packed .= $name . chr(self::TRAILER) . self::encode($value, $flags); $packed .= mb_convert_encoding($name, 'us-ascii', mb_internal_encoding()) . chr(self::TRAILER) . self::encodeInternal($value, $flags);
return $packed . chr(self::TRAILER); return $packed . chr(self::TRAILER);
} }
private static function decodeObject(FWIFDecodeStream $data, int $flags): object { private static function decodeObjectKey($data, int $flags): string {
$packed = '';
for(;;) {
$char = fgetc($data);
if(ord($char) === self::TRAILER)
break;
$packed .= $char;
}
return mb_convert_encoding($packed, mb_internal_encoding(), 'us-ascii');
}
private static function decodeObject($data, int $flags): object {
$array = []; $array = [];
for(;;) { for(;;) {
if($data->readByte() === self::TRAILER) if(ord(fgetc($data)) === self::TRAILER)
break; break;
$data->stepBack(); fseek($data, -1, SEEK_CUR);
$name = self::decodeAsciiString($data, $flags); $array[self::decodeObjectKey($data, $flags)] = self::decodeInternal($data, $flags);
$array[$name] = self::decodeInternal($data, $flags);
} }
return (object)$array; return (object)$array;
} }
@ -190,51 +234,41 @@ class FWIF {
private static function encodeBuffer(string $buffer, int $flags): string { private static function encodeBuffer(string $buffer, int $flags): string {
return self::encodeInteger(strlen($buffer), $flags) . $buffer; return self::encodeInteger(strlen($buffer), $flags) . $buffer;
} }
private static function decodeBuffer(FWIFDecodeStream $data, int $flags): string { private static function decodeBuffer($data, int $flags): string {
return $data->readString(self::decodeInteger($data, $flags)); return fread($data, self::decodeInteger($data, $flags));
} }
private const DATETIME_FLAG_TIME = 0x01; private const DATETIME_FLAG_TIME = 0x40;
private const DATETIME_FLAG_MILLI = 0x02; private const DATETIME_FLAG_MILLI = 0x20;
private const DATETIME_FLAG_MASK = 0x03;
private const DATETIME_YEAR_SIGN = 0x800000; private const DATETIME_YEAR_SIGN = 0x40000000;
private const DATETIME_YEAR_MASK = 0x3FFF; private const DATETIME_YEAR_MASK = 0x3FFF;
private const DATETIME_YEAR_SHIFT = 9; private const DATETIME_YEAR_SHIFT = 16; // <<
private const DATETIME_MONTH_MASK = 0x0F; private const DATETIME_MONTH_MASK = 0x0F;
private const DATETIME_MONTH_SHIFT = 5; private const DATETIME_MONTH_SHIFT = 12; // <<
private const DATETIME_DAY_MASK = 0x1F; private const DATETIME_DAY_MASK = 0x1F;
private const DATETIME_DAY_SHIFT = 7; // <<
private const DATETIME_HOUR_MASK = 0x1F; private const DATETIME_HOUR_MASK = 0x1F;
private const DATETIME_HOUR_SHIFT = 12;
private const DATETIME_MINS_MASK = 0x3F; private const DATETIME_MINS_MASK = 0x3F;
private const DATETIME_MINS_SHIFT = 6; private const DATETIME_MINS_SHIFT = 8; // <<
private const DATETIME_SECS_MASK = 0x3F; private const DATETIME_SECS_MASK = 0x3F;
private const DATETIME_SECS_SHIFT = 2; // <<
private const DATETIME_MILLI_MASK = 0xFFFFF; private const DATETIME_MILLI_HI_MASK = 0x300;
private const DATETIME_MILLI_HI_SHIFT = 8; // >>
private const DATETIME_MILLI_LO_MASK = 0x0FF;
/* One block Chunked /* +--------+--------+
+--------+ +--------+ +--------+ * |.YYYYYYY|YYYYYYYY|
0 | YYYYYYY| |YYYYYYYY| |FFF.....| * |MMMMDDDD|DTVHHHHH|
8 |YYYYYYYY| |YYYYYYYM| +--------+ * |..mmmmmm|SSSSSSvv|
12 | MMMM| |MMMDDDDD| * |vvvvvvvv| |
16 | DDDDD| +--------+ * +--------+--------+
24 |FFFHHHHH| FHmS and U seperate Semicombined FHmSU
32 | mmmmmm| +--------+ +--------+ +--------+ +--------+
40 | SSSSSS| |FFFHHHHH| |UUUUUUUU| |FFFHHHHH| |UUUUUUUU|
48 |uuuuuuuu| |mmmmmmSS| |UUUUUUUU| OR |mmmmmmSS| |UUUUUUUU|
56 |uuuuuuuu| |SSSS....| |UUUU....| |SSSSUUUU| +--------+
64 |uuuuuuuu| +--------+ +--------+ +--------+
+--------+ In both situations the upper byte of FHmS would always be present for the flags.
Meaning the thing would take up at least 4 bytes and at most 9 bytes with 1 unused bit.
Most situations I'd imagine you wouldn't really care about the milliseconds,
in which you end up with 7 bytes with 5 unused bits.
Is there a benefit to this over just using .NET-style ticks?
Yes, it can be entirely handled using just 32-bit integers.
*/ */
private static function encodeDateTime(DateTimeInterface $dt, int $flags): string { private static function encodeDateTime(DateTimeInterface $dt, int $flags): string {
@ -245,52 +279,74 @@ class FWIF {
if($dt->getTimezone()->getOffset($dt) !== 0) if($dt->getTimezone()->getOffset($dt) !== 0)
$dt = DateTime::createFromInterface($dt)->setTimezone($utc); $dt = DateTime::createFromInterface($dt)->setTimezone($utc);
$dtf = 0;
$year = (int)$dt->format('Y'); $year = (int)$dt->format('Y');
$month = (int)$dt->format('n'); $month = (int)$dt->format('n');
$day = (int)$dt->format('j'); $day = (int)$dt->format('j');
$hours = (int)$dt->format('G');
$subYear = $year < 0; $subYear = $year < 0;
if($subYear) if($subYear)
$year = ~$year; $year = ~$year;
$ymd = $subYear ? self::DATETIME_YEAR_SIGN : 0; $ymdfh = $subYear ? self::DATETIME_YEAR_SIGN : 0;
$ymd |= ( $year & self::DATETIME_YEAR_MASK) << self::DATETIME_YEAR_SHIFT; $ymdfh |= ($year & self::DATETIME_YEAR_MASK) << self::DATETIME_YEAR_SHIFT;
$ymd |= (($month - 1) & self::DATETIME_MONTH_MASK) << self::DATETIME_MONTH_SHIFT; $ymdfh |= ($month & self::DATETIME_MONTH_MASK) << self::DATETIME_MONTH_SHIFT;
$ymd |= (($day - 1) & self::DATETIME_DAY_MASK); $ymdfh |= ($day & self::DATETIME_DAY_MASK) << self::DATETIME_DAY_SHIFT;
$ymdfh |= ($hours & self::DATETIME_HOUR_MASK);
var_dump(str_pad(decbin($ymd), 24, '0', STR_PAD_LEFT));
$hours = (int)$dt->format('G');
$mins = (int)$dt->format('i'); $mins = (int)$dt->format('i');
$secs = (int)$dt->format('s'); $secs = (int)$dt->format('s');
$millis = ($flags & self::DISCARD_MILLISECONDS) ? 0 : (int)$dt->format('u'); $millis = ($flags & self::DISCARD_MILLISECONDS) ? 0 : (int)$dt->format('v');
// Add year, month, day if($mins > 0 || $secs > 0 || $millis > 0) {
$ymdfh |= self::DATETIME_FLAG_TIME;
if($hours > 0 || $mins > 0 || $secs > 0 || $millis > 0) { $msv = 0;
$dtf |= self::DATETIME_FLAG_TIME; $msv |= ($mins & self::DATETIME_MINS_MASK) << self::DATETIME_MINS_SHIFT;
$hms = 0; $msv |= ($secs & self::DATETIME_SECS_MASK) << self::DATETIME_SECS_SHIFT;
$hms |= ($hours & self::DATETIME_HOUR_MASK) << self::DATETIME_HOUR_SHIFT;
$hms |= ($mins & self::DATETIME_MINS_MASK) << self::DATETIME_MINS_SHIFT;
$hms |= ($secs & self::DATETIME_SECS_MASK);
var_dump(str_pad(decbin($hms), 17, '0', STR_PAD_LEFT));
if($millis > 0) { if($millis > 0) {
$dtf |= self::DATETIME_FLAG_MILLI; $ymdfh |= self::DATETIME_FLAG_MILLI;
$millis &= self::DATETIME_MILLI_MASK; $msv |= ($millis & self::DATETIME_MILLI_HI_MASK) >> self::DATETIME_MILLI_HI_SHIFT;
$v = $millis & self::DATETIME_MILLI_LO_MASK;
var_dump(str_pad(decbin($millis), 20, '0', STR_PAD_LEFT));
} }
} }
echo "\r\n"; $packed = pack('N', $ymdfh);
if($ymdfh & self::DATETIME_FLAG_TIME) {
$packed .= pack('n', $msv);
if($ymdfh & self::DATETIME_FLAG_MILLI)
$packed .= chr($v);
}
return ''; return $packed;
} }
private static function decodeDateTime(FWIFDecodeStream $data, int $flags): DateTimeInterface { private static function decodeDateTime($data, int $flags): DateTimeInterface {
return new \DateTime(); $ymdfh = unpack('N', fread($data, 4))[1];
$hasMsv = $ymdfh & self::DATETIME_FLAG_TIME;
$hasV = $hasMsv && ($ymdfh & self::DATETIME_FLAG_MILLI);
$msv = $hasMsv ? unpack('n', fread($data, 2))[1] : 0;
$v = $hasV ? ord(fgetc($data)) : 0;
$year = ($ymdfh >> self::DATETIME_YEAR_SHIFT) & self::DATETIME_YEAR_MASK;
$month = ($ymdfh >> self::DATETIME_MONTH_SHIFT) & self::DATETIME_MONTH_MASK;
$day = ($ymdfh >> self::DATETIME_DAY_SHIFT) & self::DATETIME_DAY_MASK;
$hour = $ymdfh & self::DATETIME_HOUR_MASK;
if($ymdfh & self::DATETIME_YEAR_SIGN)
$year = ~$year;
$dt = sprintf('%04d-%02d-%02dT%02d:', $year, $month, $day, $hour);
if($hasMsv) {
$mins = ($msv >> self::DATETIME_MINS_SHIFT) & self::DATETIME_MINS_MASK;
$secs = ($msv >> self::DATETIME_SECS_SHIFT) & self::DATETIME_SECS_MASK;
$dt .= sprintf('%02d:%02d', $mins, $secs);
if($hasV) {
$millis = ($msv << self::DATETIME_MILLI_HI_SHIFT) & self::DATETIME_MILLI_HI_MASK;
$millis |= $v;
$dt .= sprintf('.%03d', $millis);
}
} else $dt .= '00:00';
return new DateTimeImmutable($dt);
} }
} }

View file

@ -1,42 +0,0 @@
<?php
namespace FWIF;
class FWIFDecodeStream {
private string $body;
private int $length;
private int $position = 0;
public function __construct(string $body) {
$this->body = $body;
$this->length = strlen($body);
}
public function getLength(): int {
return $this->length;
}
public function getPosition(): int {
return $this->position;
}
public function stepBack(): void {
$this->position = max(0, $this->position - 1);
}
public function readByte(): int {
if($this->position + 1 >= $this->length)
return FWIF::TRAILER;
return ord($this->body[$this->position++]);
}
public function readChar(): string {
return chr($this->readByte());
}
public function readString(int $length): string {
// Bounds checks? What are those?
$string = substr($this->body, $this->position, $length);
$this->position += $length;
return $string;
}
}

View file

@ -0,0 +1,4 @@
<?php
namespace FWIF;
class FWIFUnsupportedVersionException extends FWIFException {}

View file

@ -14,12 +14,12 @@ if($request->match('GET', '/packages')) {
$packages = empty($tags) ? Patchouli::getPackages() : Patchouli::getPackagesWithTags($tags); $packages = empty($tags) ? Patchouli::getPackages() : Patchouli::getPackagesWithTags($tags);
$encoded = FWIF::encode($packages); $encoded = FWIF::encode($packages);
echo strlen($encoded) . ' ' . $encoded; echo 'FWIF ' . strlen($encoded) . ' bytes ' . $encoded;
echo "\r\n\r\n--------------------\r\n\r\n"; echo "\r\n\r\n--------------------\r\n\r\n";
$jsonEncoded = json_encode($packages, JSON_INVALID_UTF8_SUBSTITUTE); $jsonEncoded = json_encode($packages, JSON_INVALID_UTF8_SUBSTITUTE);
echo strlen($jsonEncoded) . ' ' . $jsonEncoded; echo 'JSON ' . strlen($jsonEncoded) . ' bytes ' . $jsonEncoded;
echo "\r\n\r\n--------------------\r\n\r\n"; echo "\r\n\r\n--------------------\r\n\r\n";

View file

@ -36,7 +36,7 @@ class DummyPackage implements IPackage, \JsonSerializable {
'neg32' => -12345678, 'neg32' => -12345678,
'neg64' => -1234567890987654, 'neg64' => -1234567890987654,
'float' => 12345.6789, 'float' => 12345.6789,
'invalid' => "\xFF\x25\x25\x02\xFF御坂e美琴\xFF\xFF\xFF", 'invalid' => "\xFF\x25\x25\x02\xFF蠕。蝮F鄒守清\xFF\xFF\xFF",
'datetime' => new \DateTime('2013-01-27 23:14:44 CET'), 'datetime' => new \DateTime('2013-01-27 23:14:44 CET'),
'datetimeNegative' => new \DateTime('-2013-01-27 23:14:44 CET'), 'datetimeNegative' => new \DateTime('-2013-01-27 23:14:44 CET'),
'datetimeNow' => new \DateTime(), 'datetimeNow' => new \DateTime(),
@ -54,6 +54,10 @@ class DummyPackage implements IPackage, \JsonSerializable {
} }
public function jsonSerialize() { public function jsonSerialize() {
return $this->fwifSerialize(); $serial = $this->fwifSerialize();
$serial['datetime'] = $serial['datetime']->format(\DateTimeInterface::ATOM);
$serial['datetimeNegative'] = $serial['datetimeNegative']->format(\DateTimeInterface::ATOM);
$serial['datetimeNow'] = $serial['datetimeNow']->format(\DateTimeInterface::ATOM);
return $serial;
} }
} }