uiharu/public/index.php

586 lines
29 KiB
PHP
Raw Normal View History

2022-07-03 22:41:44 +00:00
<?php
define('UIH_VERSION', '20201009');
define('UIH_DEBUG', isset($_GET['_debug']));
define('UIH_CACHE', !UIH_DEBUG && !isset($_GET['_skip']));
define('UIH_INCLUDE_RAW', UIH_DEBUG || isset($_GET['include_raw']));
define('UIH_SEM_NAME', 'U');
define('UIH_SEM_PATH', sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'uiharu');
function uih_eeprom_lookup(stdClass $resp, string $eepromFileId, string $domain = 'flashii'): void {
$resp->type = 'eeprom:file';
$resp->color = '#8559a5';
$resp->eeprom_file_id = $eepromFileId;
$curl = curl_init("https://eeprom.{$domain}.net/uploads/{$resp->eeprom_file_id}.json");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$eepromResp = curl_exec($curl);
curl_close($curl);
$resp->eeprom_file_info = json_decode($eepromResp);
if(isset($resp->eeprom_file_info->name))
$resp->title = $resp->eeprom_file_info->name;
if(isset($resp->eeprom_file_info->thumb))
$resp->image = $resp->eeprom_file_info->thumb;
$resp->site_name = 'Flashii EEPROM';
}
if(!is_dir(UIH_SEM_PATH))
mkdir(UIH_SEM_PATH, 0777, true);
require_once __DIR__ . '/../config.php';
header('X-Powered-By: Uiharu');
ini_set('display_errors', UIH_DEBUG ? 'on' : 'off');
error_reporting(UIH_DEBUG ? -1 : 0);
set_include_path(realpath(__DIR__ . '/../lib/') . PATH_SEPARATOR . get_include_path());
spl_autoload_extensions('.php');
spl_autoload_register();
DB::init(UIH_PDO_DSN, UIH_PDO_USER, UIH_PDO_PASS, DB::ATTRS);
DB::exec('DELETE FROM `uih_metadata_cache` WHERE `metadata_created` < NOW() - INTERVAL 7 DAY');
$reqMethod = filter_input(INPUT_SERVER, 'REQUEST_METHOD', FILTER_SANITIZE_STRING);
$reqPath = '/' . trim(parse_url(filter_input(INPUT_SERVER, 'REQUEST_URI', FILTER_SANITIZE_STRING), PHP_URL_PATH), '/');
$reqHead = false;
if($reqMethod == 'HEAD') {
$reqMethod = 'GET';
$reqHead = true;
}
if(!empty($_SERVER['HTTP_ORIGIN'])) {
$originLast12 = substr($_SERVER['HTTP_ORIGIN'], -12, 12);
$originLast10 = substr($_SERVER['HTTP_ORIGIN'], -10, 10);
if($originLast12 !== '/flashii.net' && $originLast12 !== '.flashii.net'
&& $originLast10 !== '/edgii.net' && $originLast10 !== '.edgii.net'
&& $_SERVER['HTTP_ORIGIN'] !== 'https://flashii.net'
&& $_SERVER['HTTP_ORIGIN'] !== 'http://flashii.net'
&& $_SERVER['HTTP_ORIGIN'] !== 'https://edgii.net'
&& $_SERVER['HTTP_ORIGIN'] !== 'http://edgii.net') {
http_response_code(403);
return;
}
header('Access-Control-Allow-Origin: ' . $_SERVER['HTTP_ORIGIN']);
header('Vary: Origin');
}
if($reqMethod === 'OPTIONS') {
http_response_code(204);
//header('Access-Control-Allow-Credentials: true');
//header('Access-Control-Allow-Headers: Authorization');
header('Access-Control-Allow-Methods: OPTIONS, GET, POST');
return;
}
if($reqPath === '/metadata') {
// Allow using POST for ridiculous urls.
if($reqMethod !== 'GET' && $reqMethod !== 'POST') {
http_response_code(405);
return;
}
header('Content-Type: application/json; charset=utf-8');
if($reqHead)
return;
Stopwatch::start();
$resp = new stdClass;
if($reqMethod === 'POST') {
$targetUrl = substr((string)file_get_contents('php://input'), 0, 1000);
} else {
$targetUrl = (string)filter_input(INPUT_GET, 'url');
}
try {
$resp->uri = $parsedUrl = new Uri($targetUrl);
} catch(InvalidArgumentException $ex) {
http_response_code(400);
$resp->error = 'metadata:uri';
echo json_encode($resp);
return;
}
// if no scheme is specified, try https
if($parsedUrl->getScheme() === '')
$parsedUrl = new Uri('https://' . (string)$parsedUrl);
$urlHash = $parsedUrl->getHash();
try {
$semPath = UIH_SEM_PATH . DIRECTORY_SEPARATOR . $urlHash;
if(!is_file($semPath))
touch($semPath);
$ftok = ftok($semPath, UIH_SEM_NAME);
$semaphore = sem_get($ftok, 1);
while(!sem_acquire($semaphore)) usleep(100);
if(UIH_CACHE) {
$loadCache = DB::prepare('SELECT `metadata_resp` AS `resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(:hash) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE')
->bind('hash', $urlHash)
->fetchObject();
if(isset($loadCache->resp)) {
$cacheResp = json_decode($loadCache->resp);
if($cacheResp !== null)
$resp = $cacheResp;
}
}
if(empty($resp->type)) {
$urlScheme = strtolower($parsedUrl->getScheme());
$urlHost = strtolower($parsedUrl->getHost());
$urlPath = '/' . trim($parsedUrl->getPath(), '/');
if($urlScheme === 'eeprom') {
if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) {
$resp->uri = $parsedUrl = new Uri('https://i.fii.moe/' . $matches[1]);
$continueRaw = true;
uih_eeprom_lookup($resp, $matches[1]);
}
} elseif($urlScheme === 'devrom') {
if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) {
$resp->uri = $parsedUrl = new Uri('https://i.edgii.net/' . $matches[1]);
$continueRaw = true;
uih_eeprom_lookup($resp, $matches[1], 'edgii');
}
} elseif($urlScheme === 'http' || $urlScheme === 'https') {
switch($urlHost) {
case 'i.flashii.net':
case 'i.fii.moe':
$eepromFileId = substr($urlPath, 1);
case 'eeprom.flashii.net':
if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches))
$eepromFileId = $matches[1];
if(!empty($eepromFileId)) {
$continueRaw = true;
uih_eeprom_lookup($resp, $eepromFileId);
}
break;
case 'i.edgii.net':
$eepromFileId = substr($urlPath, 1);
case 'eeprom.edgii.net':
if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches))
$eepromFileId = $matches[1];
if(!empty($eepromFileId)) {
$continueRaw = true;
uih_eeprom_lookup($resp, $eepromFileId, 'edgii');
}
break;
case 'twitter.com': case 'www.twitter.com':
case 'm.twitter.com': case 'mobile.twitter.com':
case 'nitter.net': case 'www.nitter.net':
if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $urlPath, $matches)) {
$resp->type = 'twitter:tweet';
$resp->color = '#1da1f2';
$resp->tweet_id = strval($matches[1] ?? '0');
$curl = curl_init("https://api.twitter.com/2/tweets?ids={$resp->tweet_id}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . TWITTER_API_TOKEN,
'Accept: application/json',
],
]);
$tweetResp = curl_exec($curl);
curl_close($curl);
$resp->tweet_info = json_decode($tweetResp);
if(isset($resp->tweet_info->includes->users[0]->name))
$resp->title = $resp->tweet_info->includes->users[0]->name;
if(isset($resp->tweet_info->includes->users[0]->profile_image_url))
$resp->image = $resp->tweet_info->includes->users[0]->profile_image_url;
if(isset($resp->tweet_info->data[0]->text))
$resp->description = $resp->tweet_info->data[0]->text;
$resp->site_name = 'Twitter';
break;
}
if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $urlPath, $matches)) {
$resp->type = 'twitter:user';
$resp->color = '#1da1f2';
$resp->twitter_user_name = strval($matches[1] ?? '');
$curl = curl_init("https://api.twitter.com/2/users/by?usernames={$resp->twitter_user_name}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . TWITTER_API_TOKEN,
'Accept: application/json',
],
]);
$twitUserResp = curl_exec($curl);
curl_close($curl);
$resp->twitter_user_info = json_decode($twitUserResp);
if(isset($resp->twitter_user_info->data[0]->name))
$resp->title = $resp->twitter_user_info->data[0]->name;
if(isset($resp->twitter_user_info->data[0]->profile_image_url))
$resp->image = $resp->twitter_user_info->data[0]->profile_image_url;
if(isset($resp->twitter_user_info->data[0]->description))
$resp->description = $resp->twitter_user_info->data[0]->description;
$resp->site_name = 'Twitter';
break;
}
break;
case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it
$youtubeVideoId = substr($urlPath, 1);
case 'youtube.com': case 'www.youtube.com':
case 'youtube-nocookie.com': case 'www.youtube-nocookie.com':
parse_str($parsedUrl->getQuery(), $queryString);
if(!isset($youtubeVideoId) && $urlPath === '/watch')
$youtubeVideoId = $queryString['v'] ?? null;
if(!empty($youtubeVideoId)) {
$resp->type = 'youtube:video';
$resp->color = '#f00';
$resp->youtube_video_id = $youtubeVideoId;
if(isset($queryString['t']))
$resp->youtube_start_time = $queryString['t'];
if(isset($queryString['list']))
$resp->youtube_playlist = $queryString['list'];
if(isset($queryString['index']))
$resp->youtube_playlist_index = $queryString['index'];
$curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . GOOGLE_API_KEY);
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$youtubeResp = curl_exec($curl);
curl_close($curl);
$resp->youtube_video_info = json_decode($youtubeResp);
if(isset($resp->youtube_video_info->items[0]->snippet->title))
$resp->title = $resp->youtube_video_info->items[0]->snippet->title;
if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url))
$resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url;
if(isset($resp->youtube_video_info->items[0]->snippet->description))
$resp->description = $resp->youtube_video_info->items[0]->snippet->description;
$resp->site_name = 'YouTube';
}
break;
}
} else {
http_response_code(404);
$resp->error = 'metadata:scheme';
}
if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl->getScheme(), ['http', 'https'])) {
$curl = curl_init((string)$parsedUrl);
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => true,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5,
CURLOPT_PATH_AS_IS => true,
CURLOPT_NOBODY => true,
CURLOPT_HEADER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_DEFAULT_PROTOCOL => 'https',
CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml',
],
]);
$headers = curl_exec($curl);
if($headers === false) {
$resp->error = 'metadata:timeout';
$resp->errorMessage = curl_error($curl);
} else {
$headersRaw = explode("\r\n", trim($headers));
$statusCode = 200;
$headers = [];
foreach($headersRaw as $header) {
if(empty($header))
continue;
if(strpos($header, ':') === false) {
$headParts = explode(' ', $header);
if(isset($headParts[1]) && is_numeric($headParts[1]))
$statusCode = (int)$headParts[1];
$headers = [];
continue;
}
$headerParts = explode(':', $header, 2);
$headerParts[0] = mb_strtolower($headerParts[0]);
if(isset($headers[$headerParts[0]]))
$headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? '');
else
$headers[$headerParts[0]] = trim($headerParts[1] ?? '');
}
try {
$contentType = new MediaType($headers['content-type'] ?? '');
} catch(InvalidArgumentException $ex) {
$contentType = new MediaType('application/octet-stream');
}
$resp->content_type = $contentType;
$isHTML = $contentType->match('text/html');
$isXHTML = $contentType->match('application/xhtml+xml');
if($isHTML || $isXHTML) {
curl_setopt_array($curl, [
CURLOPT_NOBODY => false,
CURLOPT_HEADER => false,
]);
$body = curl_exec($curl);
curl_close($curl);
$document = new DOMDocument;
if($isXHTML) {
$document->loadXML($body);
} else {
@$document->loadHTML('<?xml encoding="' . $contentType->getCharset() . '">' . $body);
foreach($document->childNodes as $child)
if($child->nodeType === XML_PI_NODE) {
$document->removeChild($child);
break;
}
$document->encoding = $contentType->getCharset();
}
$charSet = $document->encoding;
$resp->type = 'website';
$resp->title = '';
$isMetaTitle = false;
$titleTag = $document->getElementsByTagName('title');
foreach($titleTag as $tag) {
$resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet));
break;
}
$metaTags = $document->getElementsByTagName('meta');
foreach($metaTags as $tag) {
$nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : (
$tag->hasAttribute('property') ? $tag->getAttribute('property') : ''
);
$valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : (
$tag->hasAttribute('content') ? $tag->getAttribute('content') : ''
);
$nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet));
$valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet));
if(empty($nameAttr) || empty($valueAttr))
continue;
switch($nameAttr) {
case 'og:title':
case 'twitter:title':
if(!$isMetaTitle) {
$isMetaTitle = true;
$resp->title = $valueAttr;
}
break;
case 'description':
case 'og:description':
case 'twitter:description':
if(!isset($resp->description))
$resp->description = $valueAttr;
break;
case 'og:site_name':
$resp->site_name = $valueAttr;
break;
case 'og:image':
case 'twitter:image':
$resp->image = $valueAttr;
break;
case 'theme-color':
$resp->color = $valueAttr;
break;
case 'og:type':
$resp->type = $valueAttr;
break;
}
}
} else {
$resp->is_image = $isImage = $contentType->match('image/*');
$resp->is_audio = $isAudio = $contentType->match('audio/*');
$resp->is_video = $isVideo = $contentType->match('video/*');
if($isImage || $isAudio || $isVideo) {
curl_close($curl);
$resp->media = new stdClass;
$ffmpeg = json_decode(shell_exec(sprintf('ffprobe -show_streams -show_format -print_format json -v quiet -i %s', escapeshellarg((string)$parsedUrl))));
if(!empty($ffmpeg)) {
if(!empty($ffmpeg->format)) {
$resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100);
if(!empty($ffmpeg->format->duration))
$resp->media->duration = floatval($ffmpeg->format->duration);
if(!empty($ffmpeg->format->size))
$resp->media->size = intval($ffmpeg->format->size);
if(!empty($ffmpeg->format->bit_rate))
$resp->media->bitrate = intval($ffmpeg->format->bit_rate);
if($isVideo || $isImage) {
if(!empty($ffmpeg->streams)) {
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) !== 'video')
continue;
$resp->width = intval($stream->coded_width ?? $stream->width ?? -1);
$resp->height = intval($stream->coded_height ?? $stream->height ?? -1);
if(!empty($stream->display_aspect_ratio))
$resp->media->aspect_ratio = $stream->display_aspect_ratio;
if($isImage)
break;
}
}
}
if($isAudio) {
function eat_tags(stdClass $dest, stdClass $source): void {
if(!empty($source->title) || !empty($source->TITLE))
$dest->title = $source->title ?? $source->TITLE;
if(!empty($source->artist) || !empty($source->ARTIST))
$dest->artist = $source->artist ?? $source->ARTIST;
if(!empty($source->album) || !empty($source->ALBUM))
$dest->album = $source->album ?? $source->ALBUM;
if(!empty($source->date) || !empty($source->DATE))
$dest->date = $source->date ?? $source->DATE;
if(!empty($source->comment) || !empty($source->COMMENT))
$dest->comment = $source->comment ?? $source->COMMENT;
if(!empty($source->genre) || !empty($source->GENRE))
$dest->genre = $source->genre ?? $source->GENRE;
}
if(!empty($ffmpeg->format->tags)) {
$resp->media->tags = new stdClass;
eat_tags($resp->media->tags, $ffmpeg->format->tags);
} elseif(!empty($ffmpeg->streams)) {
// iterate over streams, fuck ogg
$resp->media->tags = new stdClass;
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) {
eat_tags($resp->media->tags, $stream->tags);
if(!empty($resp->media->tags))
break;
}
}
}
if(empty($resp->title)) {
$audioTitle = '';
if(!empty($resp->media->tags->artist))
$audioTitle .= $resp->media->tags->artist . ' - ';
if(!empty($resp->media->tags->title))
$audioTitle .= $resp->media->tags->title;
if(!empty($resp->media->tags->date))
$audioTitle .= ' (' . $resp->media->tags->date . ')';
if(!empty($audioTitle))
$resp->title = $audioTitle;
}
if(empty($resp->description) && !empty($resp->media->tags->comment))
$resp->description = $resp->media->tags->comment;
}
}
}
if(UIH_INCLUDE_RAW)
$resp->ffmpeg = $ffmpeg;
} else curl_close($curl);
}
}
}
Stopwatch::stop();
$resp->took = Stopwatch::elapsed();
$respJson = json_encode($resp);
DB::prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(:hash), :resp)')
->bind('hash', $urlHash)
->bind('resp', $respJson)
->execute();
}
} finally {
if(!empty($semaphore))
sem_release($semaphore);
if(is_file($semPath))
unlink($semPath);
}
echo $respJson ?? json_encode($resp);
return;
}
if($reqPath === '/') {
if($reqMethod !== 'GET') {
http_response_code(405);
return;
}
header('Content-Type: text/plain');
if($reqHead)
return;
echo 'Metadata lookup service - OK';
return;
}
http_response_code(404);