Fixed media type detection.
This commit is contained in:
parent
f60dcc309f
commit
9a893bb23c
2 changed files with 27 additions and 15 deletions
|
@ -1,6 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
namespace Uiharu\Lookup;
|
namespace Uiharu\Lookup;
|
||||||
|
|
||||||
|
use finfo;
|
||||||
use stdClass;
|
use stdClass;
|
||||||
use DOMDocument;
|
use DOMDocument;
|
||||||
use RuntimeException;
|
use RuntimeException;
|
||||||
|
@ -102,28 +103,38 @@ final class WebLookup implements \Uiharu\ILookup {
|
||||||
if($head === null)
|
if($head === null)
|
||||||
throw new RuntimeException('Web request timed out: ' . self::reqError($req));
|
throw new RuntimeException('Web request timed out: ' . self::reqError($req));
|
||||||
|
|
||||||
|
$mediaType = MediaType::parse('application/octet-stream');
|
||||||
|
$hasContentType = array_key_exists('content-type', $head['lines']);
|
||||||
|
|
||||||
|
if($hasContentType) {
|
||||||
try {
|
try {
|
||||||
$mediaType = MediaType::parse($head['lines']['content-type'] ?? '');
|
$mediaType = MediaType::parse($head['lines']['content-type'] ?? '');
|
||||||
} catch(InvalidArgumentException $ex) {
|
} catch(InvalidArgumentException $ex) {}
|
||||||
$mediaType = MediaType::parse('application/octet-stream');
|
|
||||||
|
if(MediaTypeExts::isMedia($mediaType)) {
|
||||||
|
self::reqClose($req);
|
||||||
|
return $this->lookupMedia($url, $mediaType);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$isXHTML = $mediaType->equals('application/xhtml+xml') || $mediaType->equals('application/xml');
|
$body = self::reqBody($req);
|
||||||
if($isXHTML || $mediaType->equals('text/html'))
|
|
||||||
return $this->lookupSite($url, $req, $mediaType, $isXHTML);
|
|
||||||
|
|
||||||
self::reqClose($req);
|
self::reqClose($req);
|
||||||
|
|
||||||
if(MediaTypeExts::isMedia($mediaType))
|
if(!$hasContentType)
|
||||||
return $this->lookupMedia($url, $mediaType);
|
try {
|
||||||
|
$finfo = new finfo(FILEINFO_MIME);
|
||||||
|
$mediaType = MediaType::parse($finfo->buffer($body));
|
||||||
|
} catch(InvalidArgumentException $ex) {}
|
||||||
|
|
||||||
|
if($mediaType->equals('text/html')
|
||||||
|
|| $mediaType->equals('application/xhtml+xml')
|
||||||
|
|| $mediaType->equals('application/xml'))
|
||||||
|
return $this->lookupSite($url, $req, $mediaType, $body);
|
||||||
|
|
||||||
return new WebLookupFallbackResult($url, $mediaType, $url->getHost() . ': ' . basename($url->getPath()));
|
return new WebLookupFallbackResult($url, $mediaType, $url->getHost() . ': ' . basename($url->getPath()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private function lookupSite(Url $url, $req, MediaType $mediaType, bool $isXHTML): WebLookupResult {
|
private function lookupSite(Url $url, $req, MediaType $mediaType, string $body): WebLookupResult {
|
||||||
$body = self::reqBody($req);
|
|
||||||
self::reqClose($req);
|
|
||||||
|
|
||||||
// ok hear me out
|
// ok hear me out
|
||||||
// there's absolutely no good html scraping libraries for PHP
|
// there's absolutely no good html scraping libraries for PHP
|
||||||
// DOMDocument Exists but kinda blows at catching weird encoding events like with pixiv
|
// DOMDocument Exists but kinda blows at catching weird encoding events like with pixiv
|
||||||
|
|
|
@ -4,6 +4,7 @@ namespace Uiharu\Lookup;
|
||||||
use RuntimeException;
|
use RuntimeException;
|
||||||
use Uiharu\Url;
|
use Uiharu\Url;
|
||||||
use Index\MediaType;
|
use Index\MediaType;
|
||||||
|
use Index\Colour\Colour;
|
||||||
|
|
||||||
class WebLookupFallbackResult extends WebLookupResult {
|
class WebLookupFallbackResult extends WebLookupResult {
|
||||||
private string $title;
|
private string $title;
|
||||||
|
@ -20,7 +21,7 @@ class WebLookupFallbackResult extends WebLookupResult {
|
||||||
public function hasColour(): bool {
|
public function hasColour(): bool {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
public function getColour(): int {
|
public function getColour(): Colour {
|
||||||
throw new RuntimeException('Unsupported.');
|
throw new RuntimeException('Unsupported.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue