116 lines
3.2 KiB
PHP
116 lines
3.2 KiB
PHP
|
<?php
|
||
|
class MarkovDictionary {
|
||
|
private const MAGIC = 'FMkD';
|
||
|
private const VERSION = 1;
|
||
|
|
||
|
private $handle;
|
||
|
private int $segmentSize;
|
||
|
private int $totalSegments;
|
||
|
private int $startSegments;
|
||
|
|
||
|
public function __construct(string $path) {
|
||
|
if(!is_file($path))
|
||
|
throw new InvalidArgumentException('$path does not exist.');
|
||
|
|
||
|
$this->handle = $handle = fopen($path, 'rb');
|
||
|
|
||
|
$magic = fread($handle, 4);
|
||
|
if($magic !== self::MAGIC)
|
||
|
throw new InvalidArgumentException('$path is not a valid markov dictionary.');
|
||
|
|
||
|
$header = fread($handle, 12);
|
||
|
if(strlen($header) !== 12)
|
||
|
throw new InvalidArgumentException('$path is missing header data.');
|
||
|
|
||
|
extract(unpack('Cversion/Cunused1/Cunused2/CsegmentSize/VtotalSegments/VstartSegments', $header));
|
||
|
|
||
|
if($version !== self::VERSION)
|
||
|
throw new InvalidArgumentException('$path version is incompatible.');
|
||
|
|
||
|
$this->segmentSize = $segmentSize;
|
||
|
$this->totalSegments = $totalSegments;
|
||
|
$this->startSegments = $startSegments;
|
||
|
}
|
||
|
|
||
|
public function close(): void {
|
||
|
if($this->handle !== null) {
|
||
|
fclose($this->handle);
|
||
|
$this->handle = null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public function __destruct() {
|
||
|
$this->close();
|
||
|
}
|
||
|
|
||
|
private function reset(): void {
|
||
|
fseek($this->handle, 16, SEEK_SET);
|
||
|
}
|
||
|
|
||
|
public function getStartPosition(): int {
|
||
|
$randomStart = mt_rand(0, $this->startSegments) - 2;
|
||
|
if($randomStart > 0) {
|
||
|
for(;;) {
|
||
|
fseek($this->handle, 4 * $this->segmentSize, SEEK_CUR);
|
||
|
$isStart = fgetc($this->handle) !== "\0";
|
||
|
|
||
|
if($isStart) {
|
||
|
if($randomStart < 1)
|
||
|
break;
|
||
|
--$randomStart;
|
||
|
}
|
||
|
|
||
|
extract(unpack('vnextSegments', fread($this->handle, 2)));
|
||
|
|
||
|
fseek($this->handle, 6 * $nextSegments, SEEK_CUR);
|
||
|
}
|
||
|
|
||
|
fseek($this->handle, -(4 * $this->segmentSize) - 1, SEEK_CUR);
|
||
|
}
|
||
|
|
||
|
$startPos = ftell($this->handle);
|
||
|
$this->reset();
|
||
|
|
||
|
return $startPos;
|
||
|
}
|
||
|
|
||
|
public function generate(int $safety = 2000, int $start = -1): string {
|
||
|
if($start < 0)
|
||
|
$start = $this->getStartPosition();
|
||
|
|
||
|
fseek($this->handle, $start, SEEK_SET);
|
||
|
|
||
|
$string = '';
|
||
|
|
||
|
for($s = 0; $s < $safety; ++$s) {
|
||
|
$string .= fread($this->handle, 4 * $this->segmentSize);
|
||
|
|
||
|
fseek($this->handle, 1, SEEK_CUR);
|
||
|
|
||
|
extract(unpack('vnextSegments', fread($this->handle, 2)));
|
||
|
|
||
|
if($nextSegments < 1)
|
||
|
break;
|
||
|
|
||
|
$nexts = [];
|
||
|
|
||
|
// really shitty weighting system
|
||
|
for($i = 0; $i < $nextSegments; ++$i) {
|
||
|
extract(unpack('Voffset/vweight', fread($this->handle, 6)));
|
||
|
|
||
|
for($j = 0; $j < $weight; ++$j)
|
||
|
$nexts[] = $offset;
|
||
|
}
|
||
|
|
||
|
$offset = $nexts[array_rand($nexts)];
|
||
|
|
||
|
fseek($this->handle, $offset, SEEK_SET);
|
||
|
}
|
||
|
|
||
|
$this->reset();
|
||
|
$string = mb_convert_encoding($string, 'utf-8', 'utf-32le');
|
||
|
|
||
|
return trim($string);
|
||
|
}
|
||
|
}
|