Fixed media type detection.

This commit is contained in:
flash 2023-11-19 04:05:48 +00:00
parent f60dcc309f
commit 9a893bb23c
2 changed files with 27 additions and 15 deletions

View file

@ -1,6 +1,7 @@
<?php <?php
namespace Uiharu\Lookup; namespace Uiharu\Lookup;
use finfo;
use stdClass; use stdClass;
use DOMDocument; use DOMDocument;
use RuntimeException; use RuntimeException;
@ -102,28 +103,38 @@ final class WebLookup implements \Uiharu\ILookup {
if($head === null) if($head === null)
throw new RuntimeException('Web request timed out: ' . self::reqError($req)); throw new RuntimeException('Web request timed out: ' . self::reqError($req));
$mediaType = MediaType::parse('application/octet-stream');
$hasContentType = array_key_exists('content-type', $head['lines']);
if($hasContentType) {
try { try {
$mediaType = MediaType::parse($head['lines']['content-type'] ?? ''); $mediaType = MediaType::parse($head['lines']['content-type'] ?? '');
} catch(InvalidArgumentException $ex) { } catch(InvalidArgumentException $ex) {}
$mediaType = MediaType::parse('application/octet-stream');
if(MediaTypeExts::isMedia($mediaType)) {
self::reqClose($req);
return $this->lookupMedia($url, $mediaType);
}
} }
$isXHTML = $mediaType->equals('application/xhtml+xml') || $mediaType->equals('application/xml'); $body = self::reqBody($req);
if($isXHTML || $mediaType->equals('text/html'))
return $this->lookupSite($url, $req, $mediaType, $isXHTML);
self::reqClose($req); self::reqClose($req);
if(MediaTypeExts::isMedia($mediaType)) if(!$hasContentType)
return $this->lookupMedia($url, $mediaType); try {
$finfo = new finfo(FILEINFO_MIME);
$mediaType = MediaType::parse($finfo->buffer($body));
} catch(InvalidArgumentException $ex) {}
if($mediaType->equals('text/html')
|| $mediaType->equals('application/xhtml+xml')
|| $mediaType->equals('application/xml'))
return $this->lookupSite($url, $req, $mediaType, $body);
return new WebLookupFallbackResult($url, $mediaType, $url->getHost() . ': ' . basename($url->getPath())); return new WebLookupFallbackResult($url, $mediaType, $url->getHost() . ': ' . basename($url->getPath()));
} }
private function lookupSite(Url $url, $req, MediaType $mediaType, bool $isXHTML): WebLookupResult { private function lookupSite(Url $url, $req, MediaType $mediaType, string $body): WebLookupResult {
$body = self::reqBody($req);
self::reqClose($req);
// ok hear me out // ok hear me out
// there's absolutely no good html scraping libraries for PHP // there's absolutely no good html scraping libraries for PHP
// DOMDocument Exists but kinda blows at catching weird encoding events like with pixiv // DOMDocument Exists but kinda blows at catching weird encoding events like with pixiv

View file

@ -4,6 +4,7 @@ namespace Uiharu\Lookup;
use RuntimeException; use RuntimeException;
use Uiharu\Url; use Uiharu\Url;
use Index\MediaType; use Index\MediaType;
use Index\Colour\Colour;
class WebLookupFallbackResult extends WebLookupResult { class WebLookupFallbackResult extends WebLookupResult {
private string $title; private string $title;
@ -20,7 +21,7 @@ class WebLookupFallbackResult extends WebLookupResult {
public function hasColour(): bool { public function hasColour(): bool {
return false; return false;
} }
public function getColour(): int { public function getColour(): Colour {
throw new RuntimeException('Unsupported.'); throw new RuntimeException('Unsupported.');
} }