Fixed media type detection.
This commit is contained in:
parent
f60dcc309f
commit
9a893bb23c
2 changed files with 27 additions and 15 deletions
|
@ -1,6 +1,7 @@
|
|||
<?php
|
||||
namespace Uiharu\Lookup;
|
||||
|
||||
use finfo;
|
||||
use stdClass;
|
||||
use DOMDocument;
|
||||
use RuntimeException;
|
||||
|
@ -102,28 +103,38 @@ final class WebLookup implements \Uiharu\ILookup {
|
|||
if($head === null)
|
||||
throw new RuntimeException('Web request timed out: ' . self::reqError($req));
|
||||
|
||||
$mediaType = MediaType::parse('application/octet-stream');
|
||||
$hasContentType = array_key_exists('content-type', $head['lines']);
|
||||
|
||||
if($hasContentType) {
|
||||
try {
|
||||
$mediaType = MediaType::parse($head['lines']['content-type'] ?? '');
|
||||
} catch(InvalidArgumentException $ex) {
|
||||
$mediaType = MediaType::parse('application/octet-stream');
|
||||
} catch(InvalidArgumentException $ex) {}
|
||||
|
||||
if(MediaTypeExts::isMedia($mediaType)) {
|
||||
self::reqClose($req);
|
||||
return $this->lookupMedia($url, $mediaType);
|
||||
}
|
||||
}
|
||||
|
||||
$isXHTML = $mediaType->equals('application/xhtml+xml') || $mediaType->equals('application/xml');
|
||||
if($isXHTML || $mediaType->equals('text/html'))
|
||||
return $this->lookupSite($url, $req, $mediaType, $isXHTML);
|
||||
|
||||
$body = self::reqBody($req);
|
||||
self::reqClose($req);
|
||||
|
||||
if(MediaTypeExts::isMedia($mediaType))
|
||||
return $this->lookupMedia($url, $mediaType);
|
||||
if(!$hasContentType)
|
||||
try {
|
||||
$finfo = new finfo(FILEINFO_MIME);
|
||||
$mediaType = MediaType::parse($finfo->buffer($body));
|
||||
} catch(InvalidArgumentException $ex) {}
|
||||
|
||||
if($mediaType->equals('text/html')
|
||||
|| $mediaType->equals('application/xhtml+xml')
|
||||
|| $mediaType->equals('application/xml'))
|
||||
return $this->lookupSite($url, $req, $mediaType, $body);
|
||||
|
||||
return new WebLookupFallbackResult($url, $mediaType, $url->getHost() . ': ' . basename($url->getPath()));
|
||||
}
|
||||
|
||||
private function lookupSite(Url $url, $req, MediaType $mediaType, bool $isXHTML): WebLookupResult {
|
||||
$body = self::reqBody($req);
|
||||
self::reqClose($req);
|
||||
|
||||
private function lookupSite(Url $url, $req, MediaType $mediaType, string $body): WebLookupResult {
|
||||
// ok hear me out
|
||||
// there's absolutely no good html scraping libraries for PHP
|
||||
// DOMDocument Exists but kinda blows at catching weird encoding events like with pixiv
|
||||
|
|
|
@ -4,6 +4,7 @@ namespace Uiharu\Lookup;
|
|||
use RuntimeException;
|
||||
use Uiharu\Url;
|
||||
use Index\MediaType;
|
||||
use Index\Colour\Colour;
|
||||
|
||||
class WebLookupFallbackResult extends WebLookupResult {
|
||||
private string $title;
|
||||
|
@ -20,7 +21,7 @@ class WebLookupFallbackResult extends WebLookupResult {
|
|||
public function hasColour(): bool {
|
||||
return false;
|
||||
}
|
||||
public function getColour(): int {
|
||||
public function getColour(): Colour {
|
||||
throw new RuntimeException('Unsupported.');
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue