352 lines
14 KiB
PHP
352 lines
14 KiB
PHP
<?php
|
|
namespace FWIF;
|
|
|
|
use DateInterval;
|
|
use DateTime;
|
|
use DateTimeInterface;
|
|
use DateTimeImmutable;
|
|
use DateTimeZone;
|
|
use InvalidArgumentException;
|
|
|
|
class FWIF {
|
|
public const CONTENT_TYPE = 'text/plain; charset=us-ascii'; // TODO: come up with a mime type
|
|
|
|
public const DEFAULT = 0;
|
|
public const DISCARD_MILLISECONDS = 0x01; // Always exclude the millisecond component from DateTime
|
|
public const EXCLUDE_VERSION = 0x02; // Exclude version byte at the start of the stream
|
|
|
|
public const TYPE_NULL = 0; // NULL, no data
|
|
public const TYPE_INTEGER = 0x01; // LEB128, implicit length
|
|
public const TYPE_FLOAT = 0x02; // double precision IEEE 754, fixed length of 8 bytes
|
|
public const TYPE_STRING = 0x03; // UTF-8 string, terminated with TRAILER
|
|
public const TYPE_BUFFER = 0x04; // Buffer with binary data, prefixed with a LEB128 length
|
|
public const TYPE_ARRAY = 0x05; // List of values, terminated with TRAILER
|
|
public const TYPE_OBJECT = 0x06; // List of values with ASCII names, terminated with TRAILER
|
|
public const TYPE_DATETIME = 0x07; // A gregorian year, month and day as well as an hour, minute, seconds and millisecond component, variable ranging from 4 to 7 bytes
|
|
|
|
public const TRAILER = 0xFF; // Termination byte
|
|
|
|
public const VERSION = 0x01; // min 1, max 254
|
|
|
|
private const CODECS = [
|
|
self::TYPE_NULL => 'Null',
|
|
self::TYPE_INTEGER => 'Integer',
|
|
self::TYPE_FLOAT => 'Float',
|
|
self::TYPE_STRING => 'String',
|
|
self::TYPE_ARRAY => 'Array',
|
|
self::TYPE_OBJECT => 'Object',
|
|
self::TYPE_BUFFER => 'Buffer',
|
|
self::TYPE_DATETIME => 'DateTime',
|
|
];
|
|
|
|
private const UTF8 = '%^(?:' // https://www.w3.org/International/questions/qa-forms-utf-8.en
|
|
. '[\x09\x0A\x0D\x20-\x7E]' // ASCII
|
|
. '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte
|
|
. '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs
|
|
. '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' // straight 3-byte
|
|
. '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates
|
|
. '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3
|
|
. '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15
|
|
. '|\xF4[\x80-\x8F][\x80-\xBF]{2}' // plane 16
|
|
. ')*$%xs';
|
|
|
|
private static function isAssocArray($array): bool {
|
|
if(!is_array($array) || $array === [])
|
|
return false;
|
|
return array_keys($array) !== range(0, count($array) - 1);
|
|
}
|
|
|
|
// apparently this is faster than mb_check_encoding($string, 'utf-8');
|
|
// on PHP 7.1 on Windows at least, perhaps investigate this later
|
|
// UPDATE TODO: does this even make any sense with other internal encodings?
|
|
private static function isUTF8String(string $string): bool {
|
|
return preg_match(self::UTF8, $string) === 1;
|
|
}
|
|
|
|
private static function detectType($data, int $flags): int {
|
|
if(is_null($data))
|
|
return self::TYPE_NULL;
|
|
if(is_int($data))
|
|
return self::TYPE_INTEGER;
|
|
if(is_float($data))
|
|
return self::TYPE_FLOAT;
|
|
if(is_string($data))
|
|
return self::isUTF8String($data) ? self::TYPE_STRING : self::TYPE_BUFFER;
|
|
if(is_object($data) || self::isAssocArray($data)) {
|
|
if($data instanceof DateTimeInterface)
|
|
return self::TYPE_DATETIME;
|
|
return self::TYPE_OBJECT;
|
|
}
|
|
if(is_array($data))
|
|
return self::TYPE_ARRAY;
|
|
throw new FWIFUnsupportedTypeException(gettype($data));
|
|
}
|
|
|
|
public static function encode($data, int $flags = self::DEFAULT): string {
|
|
$encoded = self::encodeInternal($data, $flags);
|
|
if(!($flags & self::EXCLUDE_VERSION))
|
|
$encoded = chr(self::VERSION) . $encoded;
|
|
return $encoded;
|
|
}
|
|
private static function encodeInternal($data, int $flags): string {
|
|
if($data instanceof FWIFSerializable)
|
|
$data = $data->fwifSerialize();
|
|
$type = self::detectType($data, $flags);
|
|
return chr($type) . self::{'encode' . self::CODECS[$type]}($data, $flags);
|
|
}
|
|
|
|
public static function decode($data, int $flags = self::DEFAULT) {
|
|
if(is_string($data)) {
|
|
$fd = fopen('php://memory', 'rb+');
|
|
fwrite($fd, $data);
|
|
fseek($fd, 0);
|
|
$data = $fd;
|
|
}
|
|
if(!is_resource($data))
|
|
throw new InvalidArgumentException('$data must be either a string or a file handle.');
|
|
if(!($flags & self::EXCLUDE_VERSION)) {
|
|
$version = ord(fgetc($data));
|
|
if($version < 1 || $version > 254)
|
|
throw new InvalidArgumentException('$data is not a valid FWIF serialized stream.');
|
|
if($version > self::VERSION)
|
|
throw new FWIFUnsupportedVersionException;
|
|
}
|
|
$decoded = self::decodeInternal($data, $flags);
|
|
if(isset($fd))
|
|
fclose($fd);
|
|
return $decoded;
|
|
}
|
|
private static function decodeInternal($data, int $flags) {
|
|
$type = ord(fgetc($data));
|
|
if(!array_key_exists($type, self::CODECS)) {
|
|
$hexType = dechex($type); $pos = ftell($data); $hexPos = dechex($pos);
|
|
throw new FWIFUnsupportedTypeException("Unsupported type {$type} (0x{$hexType}) at position {$pos} (0x{$hexPos})");
|
|
}
|
|
return self::{'decode' . self::CODECS[$type]}($data, $flags);
|
|
}
|
|
|
|
private static function encodeNull($data, int $flags): string { return ''; }
|
|
private static function decodeNull($data, int $flags) { return null; }
|
|
|
|
private static function encodeInteger(int $number, int $flags): string {
|
|
$packed = ''; $more = 1; $negative = $number < 0; $size = PHP_INT_SIZE * 8;
|
|
while($more) {
|
|
$byte = $number & 0x7F;
|
|
$number >>= 7;
|
|
if($negative)
|
|
$number |= (~0 << ($size - 7));
|
|
if((!$number && !($byte & 0x40)) || ($number === -1 && ($byte & 0x40)))
|
|
$more = 0;
|
|
else
|
|
$byte |= 0x80;
|
|
$packed .= chr($byte);
|
|
}
|
|
return $packed;
|
|
}
|
|
private static function decodeInteger($data, int $flags): int {
|
|
$number = 0; $shift = 0; $o = 0; $size = PHP_INT_SIZE * 8;
|
|
do {
|
|
$byte = ord(fgetc($data));
|
|
$number |= ($byte & 0x7F) << $shift;
|
|
$shift += 7;
|
|
} while($byte & 0x80);
|
|
if(($shift < $size) && ($byte & 0x40))
|
|
$number |= (~0 << $shift);
|
|
return $number;
|
|
}
|
|
|
|
// I still don't like these
|
|
private static function encodeFloat(float $number, int $flags): string {
|
|
return pack('E', $number);
|
|
}
|
|
private static function decodeFloat($data, int $flags): float {
|
|
return unpack('E', fread($data, 8))[1];
|
|
}
|
|
|
|
private static function encodeString(string $string, int $flags): string {
|
|
$packed = '';
|
|
$string = unpack('C*', mb_convert_encoding($string, 'utf-8', mb_internal_encoding()));
|
|
foreach($string as $char)
|
|
$packed .= chr($char);
|
|
return $packed . chr(self::TRAILER);
|
|
}
|
|
private static function decodeString($data, int $flags): string {
|
|
$packed = '';
|
|
for(;;) {
|
|
$char = fgetc($data); $byte = ord($char);
|
|
if($byte == self::TRAILER)
|
|
break;
|
|
$packed .= $char;
|
|
if(($byte & 0xF8) == 0xF0)
|
|
$packed .= fread($data, 3);
|
|
elseif(($byte & 0xF0) == 0xE0)
|
|
$packed .= fread($data, 2);
|
|
elseif(($byte & 0xE0) == 0xC0)
|
|
$packed .= fgetc($data);
|
|
}
|
|
return mb_convert_encoding($packed, mb_internal_encoding(), 'utf-8');
|
|
}
|
|
|
|
private static function encodeArray(array $array, int $flags): string {
|
|
$packed = '';
|
|
foreach($array as $value)
|
|
$packed .= self::encodeInternal($value, $flags);
|
|
return $packed . chr(self::TRAILER);
|
|
}
|
|
private static function decodeArray($data, int $flags): array {
|
|
$array = [];
|
|
for(;;) {
|
|
if(ord(fgetc($data)) === self::TRAILER)
|
|
break;
|
|
fseek($data, -1, SEEK_CUR);
|
|
$array[] = self::decodeInternal($data, $flags);
|
|
}
|
|
return $array;
|
|
}
|
|
|
|
private static function encodeObject($object, int $flags): string {
|
|
$packed = ''; $array = (array)$object;
|
|
foreach($array as $name => $value)
|
|
$packed .= mb_convert_encoding($name, 'us-ascii', mb_internal_encoding()) . chr(self::TRAILER) . self::encodeInternal($value, $flags);
|
|
return $packed . chr(self::TRAILER);
|
|
}
|
|
private static function decodeObjectKey($data, int $flags): string {
|
|
$packed = '';
|
|
for(;;) {
|
|
$char = fgetc($data);
|
|
if(ord($char) === self::TRAILER)
|
|
break;
|
|
$packed .= $char;
|
|
}
|
|
return mb_convert_encoding($packed, mb_internal_encoding(), 'us-ascii');
|
|
}
|
|
private static function decodeObject($data, int $flags): object {
|
|
$array = [];
|
|
for(;;) {
|
|
if(ord(fgetc($data)) === self::TRAILER)
|
|
break;
|
|
fseek($data, -1, SEEK_CUR);
|
|
$array[self::decodeObjectKey($data, $flags)] = self::decodeInternal($data, $flags);
|
|
}
|
|
return (object)$array;
|
|
}
|
|
|
|
private static function encodeBuffer(string $buffer, int $flags): string {
|
|
return self::encodeInteger(strlen($buffer), $flags) . $buffer;
|
|
}
|
|
private static function decodeBuffer($data, int $flags): string {
|
|
return fread($data, self::decodeInteger($data, $flags));
|
|
}
|
|
|
|
private const DATETIME_FLAG_TIME = 0x40;
|
|
private const DATETIME_FLAG_MILLI = 0x4000;
|
|
|
|
private const DATETIME_YEAR_SIGN = 0x40000000;
|
|
private const DATETIME_YEAR_MASK = 0x3FFF;
|
|
private const DATETIME_YEAR_SHIFT = 16; // <<
|
|
|
|
private const DATETIME_MONTH_MASK = 0x0F;
|
|
private const DATETIME_MONTH_SHIFT = 12; // <<
|
|
|
|
private const DATETIME_DAY_MASK = 0x1F;
|
|
private const DATETIME_DAY_SHIFT = 7; // <<
|
|
|
|
private const DATETIME_HOUR_MASK = 0x1F;
|
|
|
|
private const DATETIME_MINS_MASK = 0x3F;
|
|
private const DATETIME_MINS_SHIFT = 8; // <<
|
|
|
|
private const DATETIME_SECS_MASK = 0x3F;
|
|
private const DATETIME_SECS_SHIFT = 2; // <<
|
|
|
|
private const DATETIME_MILLI_HI_MASK = 0x300;
|
|
private const DATETIME_MILLI_HI_SHIFT = 8; // >>
|
|
private const DATETIME_MILLI_LO_MASK = 0x0FF;
|
|
|
|
/* +--------+--------+
|
|
* |.YYYYYYY|YYYYYYYY|
|
|
* |MMMMDDDD|DT.HHHHH|
|
|
* |.Wmmmmmm|SSSSSSww|
|
|
* |wwwwwwww| |
|
|
* +--------+--------+
|
|
*/
|
|
|
|
private static function encodeDateTime(DateTimeInterface $dt, int $flags): string {
|
|
static $utc = null;
|
|
if($utc === null)
|
|
$utc = new DateTimeZone('utc');
|
|
|
|
if($dt->getTimezone()->getOffset($dt) !== 0)
|
|
$dt = DateTime::createFromInterface($dt)->setTimezone($utc);
|
|
|
|
$year = (int)$dt->format('Y');
|
|
$month = (int)$dt->format('n');
|
|
$day = (int)$dt->format('j');
|
|
$hours = (int)$dt->format('G');
|
|
|
|
$subYear = $year < 0;
|
|
if($subYear)
|
|
$year = ~$year;
|
|
|
|
$ymdfh = $subYear ? self::DATETIME_YEAR_SIGN : 0;
|
|
$ymdfh |= ($year & self::DATETIME_YEAR_MASK) << self::DATETIME_YEAR_SHIFT;
|
|
$ymdfh |= ($month & self::DATETIME_MONTH_MASK) << self::DATETIME_MONTH_SHIFT;
|
|
$ymdfh |= ($day & self::DATETIME_DAY_MASK) << self::DATETIME_DAY_SHIFT;
|
|
$ymdfh |= ($hours & self::DATETIME_HOUR_MASK);
|
|
|
|
$mins = (int)$dt->format('i');
|
|
$secs = (int)$dt->format('s');
|
|
$millis = ($flags & self::DISCARD_MILLISECONDS) ? 0 : (int)$dt->format('v');
|
|
|
|
if($mins > 0 || $secs > 0 || $millis > 0) {
|
|
$ymdfh |= self::DATETIME_FLAG_TIME;
|
|
$msw = 0;
|
|
$msw |= ($mins & self::DATETIME_MINS_MASK) << self::DATETIME_MINS_SHIFT;
|
|
$msw |= ($secs & self::DATETIME_SECS_MASK) << self::DATETIME_SECS_SHIFT;
|
|
|
|
if($millis > 0) {
|
|
$msw |= self::DATETIME_FLAG_MILLI;
|
|
$msw |= ($millis & self::DATETIME_MILLI_HI_MASK) >> self::DATETIME_MILLI_HI_SHIFT;
|
|
$w = $millis & self::DATETIME_MILLI_LO_MASK;
|
|
}
|
|
}
|
|
|
|
$packed = pack('N', $ymdfh);
|
|
if($ymdfh & self::DATETIME_FLAG_TIME) {
|
|
$packed .= pack('n', $msw);
|
|
if($msw & self::DATETIME_FLAG_MILLI)
|
|
$packed .= chr($w);
|
|
}
|
|
|
|
return $packed;
|
|
}
|
|
private static function decodeDateTime($data, int $flags): DateTimeInterface {
|
|
$ymdfh = unpack('N', fread($data, 4))[1];
|
|
$hasMsw = $ymdfh & self::DATETIME_FLAG_TIME;
|
|
$msw = $hasMsw ? unpack('n', fread($data, 2))[1] : 0;
|
|
$hasW = $hasMsw && ($msw & self::DATETIME_FLAG_MILLI);
|
|
$w = $hasW ? ord(fgetc($data)) : 0;
|
|
|
|
$year = ($ymdfh >> self::DATETIME_YEAR_SHIFT) & self::DATETIME_YEAR_MASK;
|
|
$month = ($ymdfh >> self::DATETIME_MONTH_SHIFT) & self::DATETIME_MONTH_MASK;
|
|
$day = ($ymdfh >> self::DATETIME_DAY_SHIFT) & self::DATETIME_DAY_MASK;
|
|
$hour = $ymdfh & self::DATETIME_HOUR_MASK;
|
|
|
|
if($ymdfh & self::DATETIME_YEAR_SIGN)
|
|
$year = ~$year;
|
|
$dt = sprintf('%04d-%02d-%02dT%02d:', $year, $month, $day, $hour);
|
|
|
|
if($hasMsw) {
|
|
$mins = ($msw >> self::DATETIME_MINS_SHIFT) & self::DATETIME_MINS_MASK;
|
|
$secs = ($msw >> self::DATETIME_SECS_SHIFT) & self::DATETIME_SECS_MASK;
|
|
$dt .= sprintf('%02d:%02d', $mins, $secs);
|
|
if($hasV) {
|
|
$millis = ($msw << self::DATETIME_MILLI_HI_SHIFT) & self::DATETIME_MILLI_HI_MASK;
|
|
$millis |= $w;
|
|
$dt .= sprintf('.%03d', $millis);
|
|
}
|
|
} else $dt .= '00:00';
|
|
|
|
return new DateTimeImmutable($dt);
|
|
}
|
|
}
|