Rewrote YouTube lookup handling.

This commit is contained in:
Pachira 2022-07-16 20:45:56 +00:00
parent 4ad19c6363
commit 960a791394
4 changed files with 230 additions and 53 deletions

View file

@ -11,6 +11,7 @@ if(UIH_DEBUG)
$ctx->registerLookup(new \Uiharu\Lookup\EEPROMLookup('devrom', 'eeprom.edgii.net', ['i.edgii.net']));
$ctx->registerLookup(new \Uiharu\Lookup\TwitterLookup);
$ctx->registerLookup(new \Uiharu\Lookup\YouTubeLookup);
$ctx->setupHttp();

View file

@ -16,6 +16,7 @@ use Uiharu\Lookup\EEPROMLookupResult;
use Uiharu\Lookup\TwitterLookupResult;
use Uiharu\Lookup\TwitterLookupTweetResult;
use Uiharu\Lookup\TwitterLookupUserResult;
use Uiharu\Lookup\YouTubeLookupResult;
use Index\MediaType;
use Index\Data\IDbConnection;
use Index\Http\HttpFx;
@ -137,6 +138,22 @@ final class v1_0 implements \Uiharu\IApi {
$resp->dbg_twitter_info = $result->getTwitterResult();
}
if($result instanceof YouTubeLookupResult) {
$resp->youtube_video_id = $result->getYouTubeVideoId();
if($result->hasYouTubeVideoStartTime())
$resp->youtube_start_time = $result->getYouTubeVideoStartTime();
if($result->hasYouTubePlayListId())
$resp->youtube_playlist = $result->getYouTubePlayListId();
if($result->hasYouTubePlayListIndex())
$resp->youtube_playlist_index = $result->getYouTubePlayListIndex();
if(UIH_DEBUG) {
$resp->dbg_youtube_info = $result->getYouTubeVideoInfo();
$resp->dbg_youtube_query = $result->getYouTubeUrlQuery();
}
}
if($result instanceof IHasMediaInfo) {
if($result->isMedia()) {
$resp->is_image = $result->isImage();
@ -183,59 +200,7 @@ final class v1_0 implements \Uiharu\IApi {
$urlHost = strtolower($parsedUrl->getHost());
$urlPath = '/' . trim($parsedUrl->getPath(), '/');
if($urlScheme === 'http' || $urlScheme === 'https') {
switch($urlHost) {
case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it
$youtubeVideoId = substr($urlPath, 1);
case 'youtube.com': case 'www.youtube.com':
case 'youtube-nocookie.com': case 'www.youtube-nocookie.com':
parse_str($parsedUrl->getQuery(), $queryString);
if(!isset($youtubeVideoId) && $urlPath === '/watch')
$youtubeVideoId = $queryString['v'] ?? null;
if(!empty($youtubeVideoId)) {
$resp->type = 'youtube:video';
$resp->color = '#f00';
$resp->youtube_video_id = $youtubeVideoId;
if(isset($queryString['t']))
$resp->youtube_start_time = $queryString['t'];
if(isset($queryString['list']))
$resp->youtube_playlist = $queryString['list'];
if(isset($queryString['index']))
$resp->youtube_playlist_index = $queryString['index'];
$curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . Config::get('Google', 'apiKey'));
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$youtubeResp = curl_exec($curl);
curl_close($curl);
$resp->youtube_video_info = json_decode($youtubeResp);
if(isset($resp->youtube_video_info->items[0]->snippet->title))
$resp->title = $resp->youtube_video_info->items[0]->snippet->title;
if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url))
$resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url;
if(isset($resp->youtube_video_info->items[0]->snippet->description))
$resp->description = $resp->youtube_video_info->items[0]->snippet->description;
$resp->site_name = 'YouTube';
}
break;
}
} else {
if($urlScheme !== 'http' && $urlScheme !== 'https') {
$resp->error = 'metadata:scheme';
$response->setStatusCode(400);
return $resp;

View file

@ -0,0 +1,116 @@
<?php
namespace Uiharu\Lookup;
use RuntimeException;
use Uiharu\Config;
use Uiharu\Url;
use Index\MediaType;
final class YouTubeLookup implements \Uiharu\ILookup {
private const SHORT_DOMAINS = [
'youtu.be', 'www.youtu.be', // www. doesn't work for this, but may as well cover it
];
private const VALID_TLDS = [
'ae', 'at', 'az', 'ba', 'be', 'bg', 'bh', 'bo', 'by',
'ca', 'cat', 'ch', 'cl', 'co', 'co.ae', 'co.at', 'co.cr', 'co.hu',
'co.id', 'co.il', 'co.in', 'co.jp', 'co.ke', 'co.kr', 'co.ma', 'co.nz',
'co.th', 'co.tz', 'co.ug', 'co.uk', 'co.ve', 'co.za', 'co.zw',
'com', 'com.ar', 'com.au', 'com.az', 'com.bd', 'com.bh', 'com.bo',
'com.br', 'com.by', 'com.co', 'com.do', 'com.ec', 'com.ee', 'com.eg',
'com.es', 'com.gh', 'com.gr', 'com.gt', 'com.hk', 'com.hn', 'com.hr',
'com.jm', 'com.jo', 'com.kw', 'com.lb', 'com.lv', 'com.ly', 'com.mk',
'com.mt', 'com.mx', 'com.my', 'com.ng', 'com.ni', 'com.om', 'com.pa',
'com.pe', 'com.ph', 'com.pk', 'com.pt', 'com.py', 'com.qa', 'com.ro',
'com.sa', 'com.sg', 'com.sv', 'com.tn', 'com.tr', 'com.tw', 'com.ua',
'com.uy', 'com.ve', 'cr', 'cz', 'de', 'dk', 'ee', 'es', 'fi', 'fr',
'ge', 'gr', 'gt', 'hk', 'hr', 'hu', 'ie', 'in', 'iq', 'is', 'it', 'jo',
'jp', 'kr', 'kz', 'lk', 'lt', 'lu', 'lv', 'ly', 'ma', 'me', 'mk', 'mx',
'my', 'net.in', 'ng', 'ni', 'nl', 'no', 'pa', 'pe', 'ph', 'pk', 'pl',
'pr', 'pt', 'qa', 'ro', 'rs', 'ru', 'sa', 'se', 'sg', 'si', 'sk', 'sn',
'sv', 'tn', 'ua', 'ug', 'uy', 'vn',
];
public static function isShortDomain(string $host): bool {
return in_array($host, self::SHORT_DOMAINS);
}
public function match(Url $url): bool {
$urlHost = $url->getHost();
if(self::isShortDomain($urlHost))
return true;
$parts = array_reverse(explode('.', $urlHost));
$partsCount = count($parts);
if($partsCount < 2 || $partsCount > 4)
return false;
if($parts[$partsCount - 1] === 'www')
array_pop($parts);
if($parts[0] === 'com') {
if($parts[1] !== 'youtube' && $parts[1] !== 'youtube-nocookie')
return false;
} else {
if(array_pop($parts) !== 'youtube')
return false;
$tld = implode('.', array_reverse($parts));
if(!in_array($tld, self::VALID_TLDS))
return false;
}
$urlPath = $url->getPath();
return $urlPath === '/watch'
|| str_starts_with($urlPath, '/watch/');
}
private function lookupVideo(string $videoId): ?object {
$curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$videoId}&key=" . Config::get('Google', 'apiKey'));
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$resp = curl_exec($curl);
curl_close($curl);
return json_decode($resp);
}
public function lookup(Url $url): YouTubeLookupResult {
$urlPath = $url->getPath();
parse_str($url->getQuery(), $urlQuery);
if(self::isShortDomain($url->getHost())) {
$videoId = substr($urlPath, 1);
} else {
if(str_starts_with($urlPath, '/watch/'))
$videoId = explode('/', trim($urlPath, '/'))[1] ?? '';
else
$videoId = $urlQuery['v'] ?? '';
}
if(empty($videoId))
throw new RuntimeException('YouTube video id missing.');
$videoInfo = $this->lookupVideo($videoId);
if($videoInfo === null)
throw new RuntimeException('YouTube video with given id could not be found.');
unset($urlQuery['v']);
$url = Url::parse(trim('https://www.youtube.com/watch?v=' . $videoId . '&' . http_build_query($urlQuery), '&'));
return new YouTubeLookupResult($url, $videoId, $videoInfo, $urlQuery);
}
}

View file

@ -0,0 +1,95 @@
<?php
namespace Uiharu\Lookup;
use RuntimeException;
use Uiharu\Url;
use Index\MediaType;
final class YouTubeLookupResult implements \Uiharu\ILookupResult {
public function __construct(
private Url $url,
private string $videoId,
private object $videoInfo,
private array $urlQuery
) {}
public function getUrl(): Url {
return $this->url;
}
public function getObjectType(): string {
return 'youtube:video';
}
public function getYouTubeVideoId(): string {
return $this->videoId;
}
public function getYouTubeVideoInfo(): object {
return $this->videoInfo;
}
public function getYouTubeUrlQuery(): array {
return $this->urlQuery;
}
public function hasYouTubeVideoStartTime(): bool {
return isset($this->urlQuery['t']);
}
public function getYouTubeVideoStartTime(): string {
return $this->urlQuery['t'] ?? '';
}
public function hasYouTubePlayListId(): bool {
return isset($this->urlQuery['list']);
}
public function getYouTubePlayListId(): string {
return $this->urlQuery['list'] ?? '';
}
public function hasYouTubePlayListIndex(): bool {
return isset($this->urlQuery['index']);
}
public function getYouTubePlayListIndex(): string {
return $this->urlQuery['index'] ?? '';
}
public function hasMediaType(): bool {
return false;
}
public function getMediaType(): MediaType {
throw new RuntimeException('Unsupported');
}
public function hasColour(): bool {
return true;
}
public function getColour(): int {
return 0xFF0000;
}
public function hasTitle(): bool {
return !empty($this->videoInfo->items[0]->snippet->title);
}
public function getTitle(): string {
return $this->videoInfo->items[0]->snippet->title;
}
public function hasSiteName(): bool {
return true;
}
public function getSiteName(): string {
return 'YouTube';
}
public function hasDescription(): bool {
return !empty($this->videoInfo->items[0]->snippet->description);
}
public function getDescription(): string {
return $this->videoInfo->items[0]->snippet->description;
}
public function hasPreviewImage(): bool {
return !empty($this->videoInfo->items[0]->snippet->thumbnails->medium->url);
}
public function getPreviewImage(): string {
return $this->videoInfo->items[0]->snippet->thumbnails->medium->url;
}
}