Fixed media type detection.

This commit is contained in:
Pachira 2023-11-19 04:05:48 +00:00
parent f60dcc309f
commit 9a893bb23c
2 changed files with 27 additions and 15 deletions

View file

@ -1,6 +1,7 @@
<?php
namespace Uiharu\Lookup;
use finfo;
use stdClass;
use DOMDocument;
use RuntimeException;
@ -102,28 +103,38 @@ final class WebLookup implements \Uiharu\ILookup {
if($head === null)
throw new RuntimeException('Web request timed out: ' . self::reqError($req));
try {
$mediaType = MediaType::parse($head['lines']['content-type'] ?? '');
} catch(InvalidArgumentException $ex) {
$mediaType = MediaType::parse('application/octet-stream');
$mediaType = MediaType::parse('application/octet-stream');
$hasContentType = array_key_exists('content-type', $head['lines']);
if($hasContentType) {
try {
$mediaType = MediaType::parse($head['lines']['content-type'] ?? '');
} catch(InvalidArgumentException $ex) {}
if(MediaTypeExts::isMedia($mediaType)) {
self::reqClose($req);
return $this->lookupMedia($url, $mediaType);
}
}
$isXHTML = $mediaType->equals('application/xhtml+xml') || $mediaType->equals('application/xml');
if($isXHTML || $mediaType->equals('text/html'))
return $this->lookupSite($url, $req, $mediaType, $isXHTML);
$body = self::reqBody($req);
self::reqClose($req);
if(MediaTypeExts::isMedia($mediaType))
return $this->lookupMedia($url, $mediaType);
if(!$hasContentType)
try {
$finfo = new finfo(FILEINFO_MIME);
$mediaType = MediaType::parse($finfo->buffer($body));
} catch(InvalidArgumentException $ex) {}
if($mediaType->equals('text/html')
|| $mediaType->equals('application/xhtml+xml')
|| $mediaType->equals('application/xml'))
return $this->lookupSite($url, $req, $mediaType, $body);
return new WebLookupFallbackResult($url, $mediaType, $url->getHost() . ': ' . basename($url->getPath()));
}
private function lookupSite(Url $url, $req, MediaType $mediaType, bool $isXHTML): WebLookupResult {
$body = self::reqBody($req);
self::reqClose($req);
private function lookupSite(Url $url, $req, MediaType $mediaType, string $body): WebLookupResult {
// ok hear me out
// there's absolutely no good html scraping libraries for PHP
// DOMDocument Exists but kinda blows at catching weird encoding events like with pixiv

View file

@ -4,6 +4,7 @@ namespace Uiharu\Lookup;
use RuntimeException;
use Uiharu\Url;
use Index\MediaType;
use Index\Colour\Colour;
class WebLookupFallbackResult extends WebLookupResult {
private string $title;
@ -20,7 +21,7 @@ class WebLookupFallbackResult extends WebLookupResult {
public function hasColour(): bool {
return false;
}
public function getColour(): int {
public function getColour(): Colour {
throw new RuntimeException('Unsupported.');
}