wip datetime
This commit is contained in:
parent
e92bfcf7f7
commit
584b4c5946
5 changed files with 199 additions and 48 deletions
|
@ -1,20 +1,26 @@
|
|||
<?php
|
||||
namespace FWIF;
|
||||
|
||||
use DateTime;
|
||||
use DateTimeInterface;
|
||||
use DateTimeZone;
|
||||
|
||||
class FWIF {
|
||||
public const CONTENT_TYPE = 'text/plain; charset=us-ascii'; // TODO: come up with a mime type
|
||||
|
||||
public const DEFAULT = 0;
|
||||
public const DISCARD_MILLISECONDS = 0x01; // Always exclude the millisecond component from DateTime
|
||||
|
||||
public const TYPE_NULL = 0; // NULL, no data
|
||||
public const TYPE_INTEGER = 0x01; // LEB128, implicit length
|
||||
public const TYPE_FLOAT = 0x02; // double precision IEEE 754, fixed length of 8 bytes
|
||||
public const TYPE_STRING = 0x03; // UTF-8 string, terminated with TYPE_TRAILER
|
||||
public const TYPE_ARRAY = 0x04; // List of values, terminated with TYPE_TRAILER
|
||||
public const TYPE_OBJECT = 0x05; // List of values with ASCII names, terminated with TYPE_TRAILER
|
||||
public const TYPE_BUFFER = 0x06; // Buffer with binary data, prefixed with a LEB128 length
|
||||
public const TYPE_DATE = 0x07; // A gregorian year, month and day, fixed length of * bytes
|
||||
public const TYPE_DATETIME = 0x08; // A gregorian year, month and day as well as an hour, minute and seconds component, fixed length of * bytes
|
||||
public const TYPE_PERIOD = 0x09; // A time period, fixed length of * bytes
|
||||
public const TYPE_TRAILER = 0xFF; // Termination byte
|
||||
public const TYPE_STRING = 0x03; // UTF-8 string, terminated with TRAILER
|
||||
public const TYPE_BUFFER = 0x04; // Buffer with binary data, prefixed with a LEB128 length
|
||||
public const TYPE_ARRAY = 0x05; // List of values, terminated with TRAILER
|
||||
public const TYPE_OBJECT = 0x06; // List of values with ASCII names, terminated with TRAILER
|
||||
public const TYPE_DATETIME = 0x07; // A gregorian year, month and day as well as an hour, minute and seconds component, fixed length of * bytes
|
||||
|
||||
public const TRAILER = 0xFF; // Termination byte
|
||||
|
||||
private const CODECS = [
|
||||
self::TYPE_NULL => 'Null',
|
||||
|
@ -23,54 +29,76 @@ class FWIF {
|
|||
self::TYPE_STRING => 'String',
|
||||
self::TYPE_ARRAY => 'Array',
|
||||
self::TYPE_OBJECT => 'Object',
|
||||
self::TYPE_BUFFER => 'Buffer',
|
||||
self::TYPE_DATETIME => 'DateTime',
|
||||
];
|
||||
|
||||
private const UTF8 = '%^(?:' // https://www.w3.org/International/questions/qa-forms-utf-8.en
|
||||
. '[\x09\x0A\x0D\x20-\x7E]' // ASCII
|
||||
. '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte
|
||||
. '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs
|
||||
. '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' // straight 3-byte
|
||||
. '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates
|
||||
. '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3
|
||||
. '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15
|
||||
. '|\xF4[\x80-\x8F][\x80-\xBF]{2}' // plane 16
|
||||
. ')*$%xs';
|
||||
|
||||
private static function isAssocArray($array): bool {
|
||||
if(!is_array($array) || $array === [])
|
||||
return false;
|
||||
return array_keys($array) !== range(0, count($array) - 1);
|
||||
}
|
||||
|
||||
private static function detectType($data): int {
|
||||
// apparently this is faster than mb_check_encoding($string, 'utf-8');
|
||||
// on PHP 7.1 on Windows at least, perhaps investigate this later
|
||||
private static function isUTF8String(string $string): bool {
|
||||
return preg_match(self::UTF8, $string) === 1;
|
||||
}
|
||||
|
||||
private static function detectType($data, int $flags): int {
|
||||
if(is_null($data))
|
||||
return self::TYPE_NULL;
|
||||
if(is_int($data))
|
||||
return self::TYPE_INTEGER;
|
||||
if(is_float($data))
|
||||
return self::TYPE_FLOAT;
|
||||
if(is_string($data)) // Should this check if a string is valid UTF-8 and swap over to TYPE_BUFFER?
|
||||
return self::TYPE_STRING;
|
||||
if(is_object($data) || self::isAssocArray($data))
|
||||
if(is_string($data))
|
||||
return self::isUTF8String($data) ? self::TYPE_STRING : self::TYPE_BUFFER;
|
||||
if(is_object($data) || self::isAssocArray($data)) {
|
||||
if($data instanceof DateTimeInterface)
|
||||
return self::TYPE_DATETIME;
|
||||
return self::TYPE_OBJECT;
|
||||
}
|
||||
if(is_array($data))
|
||||
return self::TYPE_ARRAY;
|
||||
throw new FWIFUnsupportedTypeException(gettype($data));
|
||||
}
|
||||
|
||||
public static function encode($data): string {
|
||||
public static function encode($data, int $flags = self::DEFAULT): string {
|
||||
if($data instanceof FWIFSerializable)
|
||||
$data = $data->fwifSerialize();
|
||||
$type = self::detectType($data);
|
||||
return chr($type) . self::{'encode' . self::CODECS[$type]}($data);
|
||||
$type = self::detectType($data, $flags);
|
||||
return chr($type) . self::{'encode' . self::CODECS[$type]}($data, $flags);
|
||||
}
|
||||
|
||||
public static function decode(string $data) {
|
||||
return self::decodeInternal(new FWIFDecodeStream($data));
|
||||
public static function decode(string $data, int $flags = self::DEFAULT) {
|
||||
return self::decodeInternal(new FWIFDecodeStream($data), $flags);
|
||||
}
|
||||
|
||||
private static function decodeInternal(FWIFDecodeStream $data) {
|
||||
private static function decodeInternal(FWIFDecodeStream $data, int $flags) {
|
||||
$type = $data->readByte();
|
||||
if(!array_key_exists($type, self::CODECS)) {
|
||||
$hexType = dechex($type); $hexPos = dechex($data->getPosition());
|
||||
throw new FWIFUnsupportedTypeException("Unsupported type {$type} (0x{$hexType}) at position {$data->getPosition()} (0x{$hexPos})");
|
||||
}
|
||||
return self::{'decode' . self::CODECS[$type]}($data);
|
||||
return self::{'decode' . self::CODECS[$type]}($data, $flags);
|
||||
}
|
||||
|
||||
private static function encodeNull($data): string { return ''; }
|
||||
private static function decodeNull(FWIFDecodeStream $data) { return null; }
|
||||
private static function encodeNull($data, int $flags): string { return ''; }
|
||||
private static function decodeNull(FWIFDecodeStream $data, int $flags) { return null; }
|
||||
|
||||
private static function encodeInteger(int $number): string {
|
||||
private static function encodeInteger(int $number, int $flags): string {
|
||||
$packed = ''; $more = 1; $negative = $number < 0; $size = PHP_INT_SIZE * 8;
|
||||
while($more) {
|
||||
$byte = $number & 0x7F;
|
||||
|
@ -85,7 +113,7 @@ class FWIF {
|
|||
}
|
||||
return $packed;
|
||||
}
|
||||
private static function decodeInteger(FWIFDecodeStream $data): int {
|
||||
private static function decodeInteger(FWIFDecodeStream $data, int $flags): int {
|
||||
$number = 0; $shift = 0; $o = 0; $size = PHP_INT_SIZE * 8;
|
||||
do {
|
||||
$byte = $data->readByte();
|
||||
|
@ -97,66 +125,172 @@ class FWIF {
|
|||
return $number;
|
||||
}
|
||||
|
||||
private static function encodeFloat(float $number): string {
|
||||
private static function encodeFloat(float $number, int $flags): string {
|
||||
return pack('E', $number);
|
||||
}
|
||||
private static function decodeFloat(FWIFDecodeStream $data): float {
|
||||
$packed = ''; for($i = 0; $i < 8; ++$i) $packed .= chr($data->readByte());
|
||||
return unpack('E', $packed)[1];
|
||||
private static function decodeFloat(FWIFDecodeStream $data, int $flags): float {
|
||||
return unpack('E', $data->readString(8))[1];
|
||||
}
|
||||
|
||||
private static function encodeString(string $string): string {
|
||||
private static function encodeString(string $string, int $flags): string {
|
||||
$packed = ''; $string = unpack('C*', mb_convert_encoding($string, 'utf-8'));
|
||||
foreach($string as $char)
|
||||
$packed .= chr($char);
|
||||
return $packed . chr(self::TYPE_TRAILER);
|
||||
return $packed . chr(self::TRAILER);
|
||||
}
|
||||
private static function decodeAsciiString(FWIFDecodeStream $data): string {
|
||||
private static function decodeAsciiString(FWIFDecodeStream $data, int $flags): string {
|
||||
$string = '';
|
||||
for(;;) {
|
||||
$byte = $data->readByte();
|
||||
if($byte === self::TYPE_TRAILER)
|
||||
if($byte === self::TRAILER)
|
||||
break;
|
||||
$string .= chr($byte);
|
||||
}
|
||||
return $string;
|
||||
}
|
||||
private static function decodeString(FWIFDecodeStream $data): string { // This should decode based on the utf-8 spec rather than just
|
||||
return mb_convert_encoding(self::decodeAsciiString($data), 'utf-8'); // grabbing the FF terminated string representation.
|
||||
private static function decodeString(FWIFDecodeStream $data, int $flags): string { // This should decode based on the utf-8 spec rather than just
|
||||
return mb_convert_encoding(self::decodeAsciiString($data, $flags), 'utf-8'); // grabbing the FF terminated string representation.
|
||||
}
|
||||
|
||||
private static function encodeArray(array $array): string {
|
||||
private static function encodeArray(array $array, int $flags): string {
|
||||
$packed = '';
|
||||
foreach($array as $value)
|
||||
$packed .= self::encode($value);
|
||||
return $packed . chr(self::TYPE_TRAILER);
|
||||
$packed .= self::encode($value, $flags);
|
||||
return $packed . chr(self::TRAILER);
|
||||
}
|
||||
private static function decodeArray(FWIFDecodeStream $data): array {
|
||||
private static function decodeArray(FWIFDecodeStream $data, int $flags): array {
|
||||
$array = [];
|
||||
for(;;) {
|
||||
if($data->readByte() === self::TYPE_TRAILER)
|
||||
if($data->readByte() === self::TRAILER)
|
||||
break;
|
||||
$data->stepBack();
|
||||
$array[] = self::decodeInternal($data);
|
||||
$array[] = self::decodeInternal($data, $flags);
|
||||
}
|
||||
return $array;
|
||||
}
|
||||
|
||||
private static function encodeObject($object): string {
|
||||
private static function encodeObject($object, int $flags): string {
|
||||
$packed = ''; $array = (array)$object;
|
||||
foreach($array as $name => $value)
|
||||
$packed .= $name . chr(self::TYPE_TRAILER) . self::encode($value);
|
||||
return $packed . chr(self::TYPE_TRAILER);
|
||||
$packed .= $name . chr(self::TRAILER) . self::encode($value, $flags);
|
||||
return $packed . chr(self::TRAILER);
|
||||
}
|
||||
private static function decodeObject(FWIFDecodeStream $data): object {
|
||||
private static function decodeObject(FWIFDecodeStream $data, int $flags): object {
|
||||
$array = [];
|
||||
for(;;) {
|
||||
if($data->readByte() === self::TYPE_TRAILER)
|
||||
if($data->readByte() === self::TRAILER)
|
||||
break;
|
||||
$data->stepBack();
|
||||
$name = self::decodeAsciiString($data);
|
||||
$array[$name] = self::decodeInternal($data);
|
||||
$name = self::decodeAsciiString($data, $flags);
|
||||
$array[$name] = self::decodeInternal($data, $flags);
|
||||
}
|
||||
return (object)$array;
|
||||
}
|
||||
|
||||
private static function encodeBuffer(string $buffer, int $flags): string {
|
||||
return self::encodeInteger(strlen($buffer), $flags) . $buffer;
|
||||
}
|
||||
private static function decodeBuffer(FWIFDecodeStream $data, int $flags): string {
|
||||
return $data->readString(self::decodeInteger($data, $flags));
|
||||
}
|
||||
|
||||
private const DATETIME_FLAG_TIME = 0x01;
|
||||
private const DATETIME_FLAG_MILLI = 0x02;
|
||||
private const DATETIME_FLAG_MASK = 0x03;
|
||||
|
||||
private const DATETIME_YEAR_SIGN = 0x800000;
|
||||
private const DATETIME_YEAR_MASK = 0x3FFF;
|
||||
private const DATETIME_YEAR_SHIFT = 9;
|
||||
|
||||
private const DATETIME_MONTH_MASK = 0x0F;
|
||||
private const DATETIME_MONTH_SHIFT = 5;
|
||||
|
||||
private const DATETIME_DAY_MASK = 0x1F;
|
||||
|
||||
private const DATETIME_HOUR_MASK = 0x1F;
|
||||
private const DATETIME_HOUR_SHIFT = 12;
|
||||
|
||||
private const DATETIME_MINS_MASK = 0x3F;
|
||||
private const DATETIME_MINS_SHIFT = 6;
|
||||
|
||||
private const DATETIME_SECS_MASK = 0x3F;
|
||||
|
||||
private const DATETIME_MILLI_MASK = 0xFFFFF;
|
||||
|
||||
/* One block Chunked
|
||||
+--------+ +--------+ +--------+
|
||||
0 | YYYYYYY| |YYYYYYYY| |FFF.....|
|
||||
8 |YYYYYYYY| |YYYYYYYM| +--------+
|
||||
12 | MMMM| |MMMDDDDD|
|
||||
16 | DDDDD| +--------+
|
||||
24 |FFFHHHHH| FHmS and U seperate Semicombined FHmSU
|
||||
32 | mmmmmm| +--------+ +--------+ +--------+ +--------+
|
||||
40 | SSSSSS| |FFFHHHHH| |UUUUUUUU| |FFFHHHHH| |UUUUUUUU|
|
||||
48 |uuuuuuuu| |mmmmmmSS| |UUUUUUUU| OR |mmmmmmSS| |UUUUUUUU|
|
||||
56 |uuuuuuuu| |SSSS....| |UUUU....| |SSSSUUUU| +--------+
|
||||
64 |uuuuuuuu| +--------+ +--------+ +--------+
|
||||
+--------+ In both situations the upper byte of FHmS would always be present for the flags.
|
||||
Meaning the thing would take up at least 4 bytes and at most 9 bytes with 1 unused bit.
|
||||
Most situations I'd imagine you wouldn't really care about the milliseconds,
|
||||
in which you end up with 7 bytes with 5 unused bits.
|
||||
Is there a benefit to this over just using .NET-style ticks?
|
||||
Yes, it can be entirely handled using just 32-bit integers.
|
||||
*/
|
||||
|
||||
private static function encodeDateTime(DateTimeInterface $dt, int $flags): string {
|
||||
static $utc = null;
|
||||
if($utc === null)
|
||||
$utc = new DateTimeZone('utc');
|
||||
|
||||
if($dt->getTimezone()->getOffset($dt) !== 0)
|
||||
$dt = DateTime::createFromInterface($dt)->setTimezone($utc);
|
||||
|
||||
$dtf = 0;
|
||||
|
||||
$year = (int)$dt->format('Y');
|
||||
$month = (int)$dt->format('n');
|
||||
$day = (int)$dt->format('j');
|
||||
|
||||
$subYear = $year < 0;
|
||||
if($subYear)
|
||||
$year = ~$year;
|
||||
|
||||
$ymd = $subYear ? self::DATETIME_YEAR_SIGN : 0;
|
||||
$ymd |= ( $year & self::DATETIME_YEAR_MASK) << self::DATETIME_YEAR_SHIFT;
|
||||
$ymd |= (($month - 1) & self::DATETIME_MONTH_MASK) << self::DATETIME_MONTH_SHIFT;
|
||||
$ymd |= (($day - 1) & self::DATETIME_DAY_MASK);
|
||||
|
||||
var_dump(str_pad(decbin($ymd), 24, '0', STR_PAD_LEFT));
|
||||
|
||||
$hours = (int)$dt->format('G');
|
||||
$mins = (int)$dt->format('i');
|
||||
$secs = (int)$dt->format('s');
|
||||
$millis = ($flags & self::DISCARD_MILLISECONDS) ? 0 : (int)$dt->format('u');
|
||||
|
||||
// Add year, month, day
|
||||
|
||||
if($hours > 0 || $mins > 0 || $secs > 0 || $millis > 0) {
|
||||
$dtf |= self::DATETIME_FLAG_TIME;
|
||||
$hms = 0;
|
||||
$hms |= ($hours & self::DATETIME_HOUR_MASK) << self::DATETIME_HOUR_SHIFT;
|
||||
$hms |= ($mins & self::DATETIME_MINS_MASK) << self::DATETIME_MINS_SHIFT;
|
||||
$hms |= ($secs & self::DATETIME_SECS_MASK);
|
||||
|
||||
var_dump(str_pad(decbin($hms), 17, '0', STR_PAD_LEFT));
|
||||
|
||||
if($millis > 0) {
|
||||
$dtf |= self::DATETIME_FLAG_MILLI;
|
||||
$millis &= self::DATETIME_MILLI_MASK;
|
||||
|
||||
var_dump(str_pad(decbin($millis), 20, '0', STR_PAD_LEFT));
|
||||
}
|
||||
}
|
||||
|
||||
echo "\r\n";
|
||||
|
||||
return '';
|
||||
}
|
||||
private static function decodeDateTime(FWIFDecodeStream $data, int $flags): DateTimeInterface {
|
||||
return new \DateTime();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,18 @@ class FWIFDecodeStream {
|
|||
|
||||
public function readByte(): int {
|
||||
if($this->position + 1 >= $this->length)
|
||||
return 0xFF;
|
||||
return FWIF::TRAILER;
|
||||
return ord($this->body[$this->position++]);
|
||||
}
|
||||
|
||||
public function readChar(): string {
|
||||
return chr($this->readByte());
|
||||
}
|
||||
|
||||
public function readString(int $length): string {
|
||||
// Bounds checks? What are those?
|
||||
$string = substr($this->body, $this->position, $length);
|
||||
$this->position += $length;
|
||||
return $string;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ if($request->match('GET', '/packages')) {
|
|||
|
||||
echo "\r\n\r\n--------------------\r\n\r\n";
|
||||
|
||||
$jsonEncoded = json_encode($packages);
|
||||
$jsonEncoded = json_encode($packages, JSON_INVALID_UTF8_SUBSTITUTE);
|
||||
echo strlen($jsonEncoded) . ' ' . $jsonEncoded;
|
||||
|
||||
echo "\r\n\r\n--------------------\r\n\r\n";
|
||||
|
|
|
@ -36,6 +36,10 @@ class DummyPackage implements IPackage, \JsonSerializable {
|
|||
'neg32' => -12345678,
|
||||
'neg64' => -1234567890987654,
|
||||
'float' => 12345.6789,
|
||||
'invalid' => "\xFF\x25\x25\x02\xFF御坂e美琴\xFF\xFF\xFF",
|
||||
'datetime' => new \DateTime('2013-01-27 23:14:44 CET'),
|
||||
'datetimeNegative' => new \DateTime('-2013-01-27 23:14:44 CET'),
|
||||
'datetimeNow' => new \DateTime(),
|
||||
'array' => ['e', 'a', 0x55],
|
||||
'object' => new \stdClass,
|
||||
'misaka' => '御坂 美琴',
|
||||
|
|
|
@ -11,6 +11,8 @@ define('PAT_LIB', PAT_ROOT . '/lib'); // Other unresolved namespaces
|
|||
ini_set('display_errors', PAT_DEBUG ? 'on' : 'off');
|
||||
error_reporting(PAT_DEBUG ? -1 : 0);
|
||||
|
||||
mb_internal_encoding('utf-8');
|
||||
|
||||
set_include_path(PAT_SRC . PATH_SEPARATOR . PAT_LIB . PATH_SEPARATOR . get_include_path());
|
||||
spl_autoload_register(function(string $className) {
|
||||
$parts = explode('\\', trim($className, '\\'), 2);
|
||||
|
|
Reference in a new issue