2020-12-23 01:44:45 +00:00
< ? php
namespace FWIF ;
2020-12-26 04:12:26 +00:00
use DateInterval ;
2020-12-24 03:25:10 +00:00
use DateTime ;
use DateTimeInterface ;
2020-12-26 04:12:26 +00:00
use DateTimeImmutable ;
2020-12-24 03:25:10 +00:00
use DateTimeZone ;
2020-12-26 04:12:26 +00:00
use InvalidArgumentException ;
2020-12-24 03:25:10 +00:00
2020-12-23 01:44:45 +00:00
class FWIF {
public const CONTENT_TYPE = 'text/plain; charset=us-ascii' ; // TODO: come up with a mime type
2020-12-24 03:25:10 +00:00
public const DEFAULT = 0 ;
public const DISCARD_MILLISECONDS = 0x01 ; // Always exclude the millisecond component from DateTime
2020-12-26 04:12:26 +00:00
public const EXCLUDE_VERSION = 0x02 ; // Exclude version byte at the start of the stream
2020-12-24 03:25:10 +00:00
2020-12-23 01:44:45 +00:00
public const TYPE_NULL = 0 ; // NULL, no data
public const TYPE_INTEGER = 0x01 ; // LEB128, implicit length
public const TYPE_FLOAT = 0x02 ; // double precision IEEE 754, fixed length of 8 bytes
2020-12-24 03:25:10 +00:00
public const TYPE_STRING = 0x03 ; // UTF-8 string, terminated with TRAILER
public const TYPE_BUFFER = 0x04 ; // Buffer with binary data, prefixed with a LEB128 length
public const TYPE_ARRAY = 0x05 ; // List of values, terminated with TRAILER
public const TYPE_OBJECT = 0x06 ; // List of values with ASCII names, terminated with TRAILER
2020-12-26 04:12:26 +00:00
public const TYPE_DATETIME = 0x07 ; // A gregorian year, month and day as well as an hour, minute, seconds and millisecond component, variable ranging from 4 to 7 bytes
2020-12-24 03:25:10 +00:00
public const TRAILER = 0xFF ; // Termination byte
2020-12-23 01:44:45 +00:00
2020-12-26 04:12:26 +00:00
public const VERSION = 0x01 ; // min 1, max 254
2020-12-23 01:44:45 +00:00
private const CODECS = [
self :: TYPE_NULL => 'Null' ,
self :: TYPE_INTEGER => 'Integer' ,
self :: TYPE_FLOAT => 'Float' ,
self :: TYPE_STRING => 'String' ,
self :: TYPE_ARRAY => 'Array' ,
self :: TYPE_OBJECT => 'Object' ,
2020-12-24 03:25:10 +00:00
self :: TYPE_BUFFER => 'Buffer' ,
self :: TYPE_DATETIME => 'DateTime' ,
2020-12-23 01:44:45 +00:00
];
2020-12-24 03:25:10 +00:00
private const UTF8 = '%^(?:' // https://www.w3.org/International/questions/qa-forms-utf-8.en
. '[\x09\x0A\x0D\x20-\x7E]' // ASCII
. '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte
. '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs
. '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' // straight 3-byte
. '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates
. '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3
. '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15
. '|\xF4[\x80-\x8F][\x80-\xBF]{2}' // plane 16
. ')*$%xs' ;
2020-12-23 01:44:45 +00:00
private static function isAssocArray ( $array ) : bool {
if ( ! is_array ( $array ) || $array === [])
return false ;
return array_keys ( $array ) !== range ( 0 , count ( $array ) - 1 );
}
2020-12-24 03:25:10 +00:00
// apparently this is faster than mb_check_encoding($string, 'utf-8');
// on PHP 7.1 on Windows at least, perhaps investigate this later
2020-12-26 04:12:26 +00:00
// UPDATE TODO: does this even make any sense with other internal encodings?
2020-12-24 03:25:10 +00:00
private static function isUTF8String ( string $string ) : bool {
return preg_match ( self :: UTF8 , $string ) === 1 ;
}
private static function detectType ( $data , int $flags ) : int {
2020-12-23 01:44:45 +00:00
if ( is_null ( $data ))
return self :: TYPE_NULL ;
if ( is_int ( $data ))
return self :: TYPE_INTEGER ;
if ( is_float ( $data ))
return self :: TYPE_FLOAT ;
2020-12-24 03:25:10 +00:00
if ( is_string ( $data ))
return self :: isUTF8String ( $data ) ? self :: TYPE_STRING : self :: TYPE_BUFFER ;
if ( is_object ( $data ) || self :: isAssocArray ( $data )) {
if ( $data instanceof DateTimeInterface )
return self :: TYPE_DATETIME ;
2020-12-23 01:44:45 +00:00
return self :: TYPE_OBJECT ;
2020-12-24 03:25:10 +00:00
}
2020-12-23 01:44:45 +00:00
if ( is_array ( $data ))
return self :: TYPE_ARRAY ;
throw new FWIFUnsupportedTypeException ( gettype ( $data ));
}
2020-12-24 03:25:10 +00:00
public static function encode ( $data , int $flags = self :: DEFAULT ) : string {
2020-12-26 04:12:26 +00:00
$encoded = self :: encodeInternal ( $data , $flags );
if ( ! ( $flags & self :: EXCLUDE_VERSION ))
$encoded = chr ( self :: VERSION ) . $encoded ;
return $encoded ;
}
private static function encodeInternal ( $data , int $flags ) : string {
2020-12-23 01:44:45 +00:00
if ( $data instanceof FWIFSerializable )
$data = $data -> fwifSerialize ();
2020-12-24 03:25:10 +00:00
$type = self :: detectType ( $data , $flags );
return chr ( $type ) . self :: { 'encode' . self :: CODECS [ $type ]}( $data , $flags );
2020-12-23 01:44:45 +00:00
}
2020-12-26 04:12:26 +00:00
public static function decode ( $data , int $flags = self :: DEFAULT ) {
if ( is_string ( $data )) {
$fd = fopen ( 'php://memory' , 'rb+' );
fwrite ( $fd , $data );
fseek ( $fd , 0 );
$data = $fd ;
}
if ( ! is_resource ( $data ))
throw new InvalidArgumentException ( '$data must be either a string or a file handle.' );
if ( ! ( $flags & self :: EXCLUDE_VERSION )) {
$version = ord ( fgetc ( $data ));
if ( $version < 1 || $version > 254 )
throw new InvalidArgumentException ( '$data is not a valid FWIF serialized stream.' );
if ( $version > self :: VERSION )
throw new FWIFUnsupportedVersionException ;
}
$decoded = self :: decodeInternal ( $data , $flags );
if ( isset ( $fd ))
fclose ( $fd );
return $decoded ;
2020-12-23 01:44:45 +00:00
}
2020-12-26 04:12:26 +00:00
private static function decodeInternal ( $data , int $flags ) {
$type = ord ( fgetc ( $data ));
2020-12-23 01:44:45 +00:00
if ( ! array_key_exists ( $type , self :: CODECS )) {
2020-12-26 04:12:26 +00:00
$hexType = dechex ( $type ); $pos = ftell ( $data ); $hexPos = dechex ( $pos );
throw new FWIFUnsupportedTypeException ( " Unsupported type { $type } (0x { $hexType } ) at position { $pos } (0x { $hexPos } ) " );
2020-12-23 01:44:45 +00:00
}
2020-12-24 03:25:10 +00:00
return self :: { 'decode' . self :: CODECS [ $type ]}( $data , $flags );
2020-12-23 01:44:45 +00:00
}
2020-12-24 03:25:10 +00:00
private static function encodeNull ( $data , int $flags ) : string { return '' ; }
2020-12-26 04:12:26 +00:00
private static function decodeNull ( $data , int $flags ) { return null ; }
2020-12-23 01:44:45 +00:00
2020-12-24 03:25:10 +00:00
private static function encodeInteger ( int $number , int $flags ) : string {
2020-12-23 01:44:45 +00:00
$packed = '' ; $more = 1 ; $negative = $number < 0 ; $size = PHP_INT_SIZE * 8 ;
while ( $more ) {
$byte = $number & 0x7F ;
$number >>= 7 ;
if ( $negative )
$number |= ( ~ 0 << ( $size - 7 ));
if (( ! $number && ! ( $byte & 0x40 )) || ( $number === - 1 && ( $byte & 0x40 )))
$more = 0 ;
else
$byte |= 0x80 ;
$packed .= chr ( $byte );
}
return $packed ;
}
2020-12-26 04:12:26 +00:00
private static function decodeInteger ( $data , int $flags ) : int {
2020-12-23 01:44:45 +00:00
$number = 0 ; $shift = 0 ; $o = 0 ; $size = PHP_INT_SIZE * 8 ;
do {
2020-12-26 04:12:26 +00:00
$byte = ord ( fgetc ( $data ));
2020-12-23 01:44:45 +00:00
$number |= ( $byte & 0x7F ) << $shift ;
$shift += 7 ;
} while ( $byte & 0x80 );
if (( $shift < $size ) && ( $byte & 0x40 ))
$number |= ( ~ 0 << $shift );
return $number ;
}
2020-12-26 04:12:26 +00:00
// I still don't like these
2020-12-24 03:25:10 +00:00
private static function encodeFloat ( float $number , int $flags ) : string {
2020-12-23 01:44:45 +00:00
return pack ( 'E' , $number );
}
2020-12-26 04:12:26 +00:00
private static function decodeFloat ( $data , int $flags ) : float {
return unpack ( 'E' , fread ( $data , 8 ))[ 1 ];
2020-12-23 01:44:45 +00:00
}
2020-12-24 03:25:10 +00:00
private static function encodeString ( string $string , int $flags ) : string {
2020-12-26 04:12:26 +00:00
$packed = '' ;
$string = unpack ( 'C*' , mb_convert_encoding ( $string , 'utf-8' , mb_internal_encoding ()));
2020-12-23 01:44:45 +00:00
foreach ( $string as $char )
$packed .= chr ( $char );
2020-12-24 03:25:10 +00:00
return $packed . chr ( self :: TRAILER );
2020-12-23 01:44:45 +00:00
}
2020-12-26 04:12:26 +00:00
private static function decodeString ( $data , int $flags ) : string {
$packed = '' ;
2020-12-23 01:44:45 +00:00
for (;;) {
2020-12-26 04:12:26 +00:00
$char = fgetc ( $data ); $byte = ord ( $char );
if ( $byte == self :: TRAILER )
2020-12-23 01:44:45 +00:00
break ;
2020-12-26 04:12:26 +00:00
$packed .= $char ;
if (( $byte & 0xF8 ) == 0xF0 )
$packed .= fread ( $data , 3 );
elseif (( $byte & 0xF0 ) == 0xE0 )
$packed .= fread ( $data , 2 );
elseif (( $byte & 0xE0 ) == 0xC0 )
$packed .= fgetc ( $data );
2020-12-23 01:44:45 +00:00
}
2020-12-26 04:12:26 +00:00
return mb_convert_encoding ( $packed , mb_internal_encoding (), 'utf-8' );
2020-12-23 01:44:45 +00:00
}
2020-12-24 03:25:10 +00:00
private static function encodeArray ( array $array , int $flags ) : string {
2020-12-23 01:44:45 +00:00
$packed = '' ;
foreach ( $array as $value )
2020-12-26 04:12:26 +00:00
$packed .= self :: encodeInternal ( $value , $flags );
2020-12-24 03:25:10 +00:00
return $packed . chr ( self :: TRAILER );
2020-12-23 01:44:45 +00:00
}
2020-12-26 04:12:26 +00:00
private static function decodeArray ( $data , int $flags ) : array {
2020-12-23 01:44:45 +00:00
$array = [];
for (;;) {
2020-12-26 04:12:26 +00:00
if ( ord ( fgetc ( $data )) === self :: TRAILER )
2020-12-23 01:44:45 +00:00
break ;
2020-12-26 04:12:26 +00:00
fseek ( $data , - 1 , SEEK_CUR );
2020-12-24 03:25:10 +00:00
$array [] = self :: decodeInternal ( $data , $flags );
2020-12-23 01:44:45 +00:00
}
return $array ;
}
2020-12-24 03:25:10 +00:00
private static function encodeObject ( $object , int $flags ) : string {
2020-12-23 01:44:45 +00:00
$packed = '' ; $array = ( array ) $object ;
foreach ( $array as $name => $value )
2020-12-26 04:12:26 +00:00
$packed .= mb_convert_encoding ( $name , 'us-ascii' , mb_internal_encoding ()) . chr ( self :: TRAILER ) . self :: encodeInternal ( $value , $flags );
2020-12-24 03:25:10 +00:00
return $packed . chr ( self :: TRAILER );
2020-12-23 01:44:45 +00:00
}
2020-12-26 04:12:26 +00:00
private static function decodeObjectKey ( $data , int $flags ) : string {
$packed = '' ;
for (;;) {
$char = fgetc ( $data );
if ( ord ( $char ) === self :: TRAILER )
break ;
$packed .= $char ;
}
return mb_convert_encoding ( $packed , mb_internal_encoding (), 'us-ascii' );
}
private static function decodeObject ( $data , int $flags ) : object {
2020-12-23 01:44:45 +00:00
$array = [];
for (;;) {
2020-12-26 04:12:26 +00:00
if ( ord ( fgetc ( $data )) === self :: TRAILER )
2020-12-23 01:44:45 +00:00
break ;
2020-12-26 04:12:26 +00:00
fseek ( $data , - 1 , SEEK_CUR );
$array [ self :: decodeObjectKey ( $data , $flags )] = self :: decodeInternal ( $data , $flags );
2020-12-23 01:44:45 +00:00
}
return ( object ) $array ;
}
2020-12-24 03:25:10 +00:00
private static function encodeBuffer ( string $buffer , int $flags ) : string {
return self :: encodeInteger ( strlen ( $buffer ), $flags ) . $buffer ;
}
2020-12-26 04:12:26 +00:00
private static function decodeBuffer ( $data , int $flags ) : string {
return fread ( $data , self :: decodeInteger ( $data , $flags ));
2020-12-24 03:25:10 +00:00
}
2020-12-26 04:12:26 +00:00
private const DATETIME_FLAG_TIME = 0x40 ;
2020-12-26 04:38:51 +00:00
private const DATETIME_FLAG_MILLI = 0x4000 ;
2020-12-24 03:25:10 +00:00
2020-12-26 04:12:26 +00:00
private const DATETIME_YEAR_SIGN = 0x40000000 ;
2020-12-24 03:25:10 +00:00
private const DATETIME_YEAR_MASK = 0x3FFF ;
2020-12-26 04:12:26 +00:00
private const DATETIME_YEAR_SHIFT = 16 ; // <<
2020-12-24 03:25:10 +00:00
private const DATETIME_MONTH_MASK = 0x0F ;
2020-12-26 04:12:26 +00:00
private const DATETIME_MONTH_SHIFT = 12 ; // <<
2020-12-24 03:25:10 +00:00
private const DATETIME_DAY_MASK = 0x1F ;
2020-12-26 04:12:26 +00:00
private const DATETIME_DAY_SHIFT = 7 ; // <<
2020-12-24 03:25:10 +00:00
private const DATETIME_HOUR_MASK = 0x1F ;
private const DATETIME_MINS_MASK = 0x3F ;
2020-12-26 04:12:26 +00:00
private const DATETIME_MINS_SHIFT = 8 ; // <<
2020-12-24 03:25:10 +00:00
private const DATETIME_SECS_MASK = 0x3F ;
2020-12-26 04:12:26 +00:00
private const DATETIME_SECS_SHIFT = 2 ; // <<
private const DATETIME_MILLI_HI_MASK = 0x300 ;
private const DATETIME_MILLI_HI_SHIFT = 8 ; // >>
private const DATETIME_MILLI_LO_MASK = 0x0FF ;
/* +--------+--------+
* |. YYYYYYY | YYYYYYYY |
2020-12-26 04:38:51 +00:00
* | MMMMDDDD | DT . HHHHH |
* |. Wmmmmmm | SSSSSSww |
* | wwwwwwww | |
2020-12-26 04:12:26 +00:00
* +--------+--------+
2020-12-24 03:25:10 +00:00
*/
private static function encodeDateTime ( DateTimeInterface $dt , int $flags ) : string {
static $utc = null ;
if ( $utc === null )
$utc = new DateTimeZone ( 'utc' );
if ( $dt -> getTimezone () -> getOffset ( $dt ) !== 0 )
$dt = DateTime :: createFromInterface ( $dt ) -> setTimezone ( $utc );
$year = ( int ) $dt -> format ( 'Y' );
$month = ( int ) $dt -> format ( 'n' );
$day = ( int ) $dt -> format ( 'j' );
2020-12-26 04:12:26 +00:00
$hours = ( int ) $dt -> format ( 'G' );
2020-12-24 03:25:10 +00:00
$subYear = $year < 0 ;
if ( $subYear )
$year = ~ $year ;
2020-12-26 04:12:26 +00:00
$ymdfh = $subYear ? self :: DATETIME_YEAR_SIGN : 0 ;
$ymdfh |= ( $year & self :: DATETIME_YEAR_MASK ) << self :: DATETIME_YEAR_SHIFT ;
$ymdfh |= ( $month & self :: DATETIME_MONTH_MASK ) << self :: DATETIME_MONTH_SHIFT ;
$ymdfh |= ( $day & self :: DATETIME_DAY_MASK ) << self :: DATETIME_DAY_SHIFT ;
$ymdfh |= ( $hours & self :: DATETIME_HOUR_MASK );
2020-12-24 03:25:10 +00:00
$mins = ( int ) $dt -> format ( 'i' );
$secs = ( int ) $dt -> format ( 's' );
2020-12-26 04:12:26 +00:00
$millis = ( $flags & self :: DISCARD_MILLISECONDS ) ? 0 : ( int ) $dt -> format ( 'v' );
2020-12-24 03:25:10 +00:00
2020-12-26 04:12:26 +00:00
if ( $mins > 0 || $secs > 0 || $millis > 0 ) {
$ymdfh |= self :: DATETIME_FLAG_TIME ;
2020-12-26 04:38:51 +00:00
$msw = 0 ;
$msw |= ( $mins & self :: DATETIME_MINS_MASK ) << self :: DATETIME_MINS_SHIFT ;
$msw |= ( $secs & self :: DATETIME_SECS_MASK ) << self :: DATETIME_SECS_SHIFT ;
2020-12-24 03:25:10 +00:00
if ( $millis > 0 ) {
2020-12-26 04:38:51 +00:00
$msw |= self :: DATETIME_FLAG_MILLI ;
$msw |= ( $millis & self :: DATETIME_MILLI_HI_MASK ) >> self :: DATETIME_MILLI_HI_SHIFT ;
$w = $millis & self :: DATETIME_MILLI_LO_MASK ;
2020-12-24 03:25:10 +00:00
}
}
2020-12-26 04:12:26 +00:00
$packed = pack ( 'N' , $ymdfh );
if ( $ymdfh & self :: DATETIME_FLAG_TIME ) {
2020-12-26 04:38:51 +00:00
$packed .= pack ( 'n' , $msw );
if ( $msw & self :: DATETIME_FLAG_MILLI )
$packed .= chr ( $w );
2020-12-26 04:12:26 +00:00
}
2020-12-24 03:25:10 +00:00
2020-12-26 04:12:26 +00:00
return $packed ;
2020-12-24 03:25:10 +00:00
}
2020-12-26 04:12:26 +00:00
private static function decodeDateTime ( $data , int $flags ) : DateTimeInterface {
$ymdfh = unpack ( 'N' , fread ( $data , 4 ))[ 1 ];
2020-12-26 04:38:51 +00:00
$hasMsw = $ymdfh & self :: DATETIME_FLAG_TIME ;
$msw = $hasMsw ? unpack ( 'n' , fread ( $data , 2 ))[ 1 ] : 0 ;
$hasW = $hasMsw && ( $msw & self :: DATETIME_FLAG_MILLI );
$w = $hasW ? ord ( fgetc ( $data )) : 0 ;
2020-12-26 04:12:26 +00:00
$year = ( $ymdfh >> self :: DATETIME_YEAR_SHIFT ) & self :: DATETIME_YEAR_MASK ;
$month = ( $ymdfh >> self :: DATETIME_MONTH_SHIFT ) & self :: DATETIME_MONTH_MASK ;
$day = ( $ymdfh >> self :: DATETIME_DAY_SHIFT ) & self :: DATETIME_DAY_MASK ;
$hour = $ymdfh & self :: DATETIME_HOUR_MASK ;
if ( $ymdfh & self :: DATETIME_YEAR_SIGN )
$year = ~ $year ;
$dt = sprintf ( '%04d-%02d-%02dT%02d:' , $year , $month , $day , $hour );
2020-12-26 04:38:51 +00:00
if ( $hasMsw ) {
$mins = ( $msw >> self :: DATETIME_MINS_SHIFT ) & self :: DATETIME_MINS_MASK ;
$secs = ( $msw >> self :: DATETIME_SECS_SHIFT ) & self :: DATETIME_SECS_MASK ;
2020-12-26 04:12:26 +00:00
$dt .= sprintf ( '%02d:%02d' , $mins , $secs );
if ( $hasV ) {
2020-12-26 04:38:51 +00:00
$millis = ( $msw << self :: DATETIME_MILLI_HI_SHIFT ) & self :: DATETIME_MILLI_HI_MASK ;
$millis |= $w ;
2020-12-26 04:12:26 +00:00
$dt .= sprintf ( '.%03d' , $millis );
}
} else $dt .= '00:00' ;
return new DateTimeImmutable ( $dt );
2020-12-24 03:25:10 +00:00
}
2020-12-23 01:44:45 +00:00
}