Added ffprobe stuff back, also Link header support for OEmbed.
This commit is contained in:
parent
e0fa91d049
commit
148a136bd2
1 changed files with 258 additions and 37 deletions
295
uiharu.ts
295
uiharu.ts
|
@ -9,8 +9,9 @@ import { parseMediaType } from "jsr:@std/media-types";
|
|||
import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts";
|
||||
|
||||
// todo: these should not be hardcoded lol
|
||||
const hostName: String = 'uiharu.edgii.net';
|
||||
const port: Number = 3009;
|
||||
const memcacheServer: String = "127.0.0.1:11211";
|
||||
const memcacheServer: String = '127.0.0.1:11211';
|
||||
const allowedOrigins: String[] = [
|
||||
'edgii.net',
|
||||
'chat.edgii.net',
|
||||
|
@ -78,12 +79,22 @@ const allowOEmbed: String[] = [ // copied from wordpress source sorta
|
|||
'.pinterest.com.au',
|
||||
'.pinterest.com.mx',
|
||||
'.wolframcloud.com',
|
||||
'.instagram.com',
|
||||
'.facebook.com',
|
||||
'.pca.st',
|
||||
'.anghami.com',
|
||||
'.bsky.app',
|
||||
'.apple.com',
|
||||
'.flashii.net',
|
||||
'.fii.moe',
|
||||
'.tako.zone',
|
||||
'.patchii.net',
|
||||
'.railgun.sh',
|
||||
'.flash.moe',
|
||||
'.edgii.net',
|
||||
];
|
||||
|
||||
const appVersion: String = '20241028';
|
||||
const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug'));
|
||||
const cache: MemcacheClient = new MemcacheClient({
|
||||
server: memcacheServer,
|
||||
|
@ -94,6 +105,21 @@ const cache: MemcacheClient = new MemcacheClient({
|
|||
},
|
||||
});
|
||||
|
||||
const uiharuFetch = async (url, init) => {
|
||||
if(!init)
|
||||
init = {};
|
||||
if(!init.headers)
|
||||
init.headers = {};
|
||||
if(!init.headers['Accept'])
|
||||
init.headers['Accept'] = 'text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8';
|
||||
if(!init.headers['Accept-Language'])
|
||||
init.headers['Accept-Language'] = 'en-GB, en;q=0.9, ja-jp;q=0.6, *;q=0.5';
|
||||
if(!init.headers['User-Agent'])
|
||||
init.headers['User-Agent'] = `Mozilla/5.0 (compatible; Uiharu/${appVersion}; +http://fii.moe/uiharu)`;
|
||||
|
||||
return await fetch(url, init);
|
||||
};
|
||||
|
||||
const readableStreamToString = async (stream?: ReadableStream): string => {
|
||||
if(stream === null)
|
||||
return '';
|
||||
|
@ -313,31 +339,69 @@ const extractLinkedData = (html) => {
|
|||
return values;
|
||||
};
|
||||
|
||||
const extractOEmbedData = async (html, url: string, urlInfo: URL) => {
|
||||
// TODO: this should also support header discovery
|
||||
const parseLinkHeader = (header: string) => {
|
||||
const links = [];
|
||||
|
||||
const lines = header.split(',');
|
||||
for(const key in lines) {
|
||||
const parts = lines[key].trim().split(';').map(part => part.trim());
|
||||
|
||||
let href = parts.shift();
|
||||
if(typeof href !== 'string' || !href.startsWith('<') || !href.endsWith('>'))
|
||||
continue;
|
||||
|
||||
href = decodeURI(href.slice(1, -1));
|
||||
const link = {};
|
||||
links.push(link);
|
||||
|
||||
for(const part of parts) {
|
||||
const attr = part.split('=', 2);
|
||||
let value = attr[1];
|
||||
if(value.startsWith('"') && value.endsWith('"'))
|
||||
value = value.slice(1, -1);
|
||||
|
||||
link[attr[0]] = value;
|
||||
}
|
||||
|
||||
// applying this last to avoid tomfoolery :3
|
||||
link.href = href;
|
||||
}
|
||||
|
||||
return links;
|
||||
};
|
||||
|
||||
const extractOEmbedData = async (response: Response, html, url: string, urlInfo: URL) => {
|
||||
let oEmbedUrl: string = '';
|
||||
|
||||
// idk how long i'll bother with this for
|
||||
if(urlInfo.host === 'x.com' || urlInfo.host === 'twitter.com')
|
||||
oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`;
|
||||
else
|
||||
else if(html !== undefined)
|
||||
oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? '';
|
||||
|
||||
if(oEmbedUrl === '') {
|
||||
const links = parseLinkHeader(response.headers.get('link') ?? '');
|
||||
for(const link of links)
|
||||
if(link.rel === 'alternate' && link.type === 'application/json+oembed') {
|
||||
oEmbedUrl = link.href;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(oEmbedUrl === '')
|
||||
return {};
|
||||
|
||||
try {
|
||||
return (await fetch(oEmbedUrl)).json();
|
||||
return (await uiharuFetch(oEmbedUrl)).json();
|
||||
} catch(ex) {
|
||||
console.error(ex);
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
const extractMetadata = async (url: string, urlInfo: URL) => {
|
||||
const data = await fetch(url);
|
||||
const contentTypeRaw = data.headers.get('content-type') ?? '';
|
||||
const extractMetadata = async (version: number, url: string, urlInfo: URL) => {
|
||||
const response = await uiharuFetch(url);
|
||||
const contentTypeRaw = response.headers.get('content-type') ?? '';
|
||||
const contentType = parseMediaType(contentTypeRaw);
|
||||
|
||||
const info = {};
|
||||
|
@ -353,8 +417,10 @@ const extractMetadata = async (url: string, urlInfo: URL) => {
|
|||
if(contentType[0])
|
||||
info.media_type = contentType[0];
|
||||
|
||||
let html = undefined;
|
||||
|
||||
if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) {
|
||||
const html = cheerio.load(await readableStreamToString(data.body));
|
||||
html = cheerio.load(await readableStreamToString(response.body));
|
||||
|
||||
const metaData = extractHtmlMetaData(html);
|
||||
const ogData = extractOpenGraphData(html);
|
||||
|
@ -368,7 +434,7 @@ const extractMetadata = async (url: string, urlInfo: URL) => {
|
|||
|
||||
if(ogData.images?.length > 0) {
|
||||
const image = ogData.images[0];
|
||||
info.image = info.image_url = image.secure_url ?? image.url;
|
||||
info.image_url = image.secure_url ?? image.url;
|
||||
if(image.width > 0)
|
||||
info.image_width = image.width;
|
||||
if(image.height > 0)
|
||||
|
@ -377,15 +443,9 @@ const extractMetadata = async (url: string, urlInfo: URL) => {
|
|||
info.image_type = image.type;
|
||||
if(image.alt)
|
||||
info.image_alt = image.alt;
|
||||
if(info.image_width > 0)
|
||||
info.width = info.image_width;
|
||||
if(info.image_height > 0)
|
||||
info.height = info.image_height;
|
||||
} else {
|
||||
addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail);
|
||||
addInfoOrDont('image_alt', twitterData.image_alt);
|
||||
if(info.image_url)
|
||||
info.image = info.image_url;
|
||||
}
|
||||
|
||||
if(ogData.audios?.length > 0) {
|
||||
|
@ -406,36 +466,191 @@ const extractMetadata = async (url: string, urlInfo: URL) => {
|
|||
info.video_type = video.type;
|
||||
if(video.tags?.length > 0)
|
||||
info.video_tags = video.tags;
|
||||
if(info.video_width > 0)
|
||||
info.width = info.video_width;
|
||||
if(info.video_height > 0)
|
||||
info.height = info.video_height;
|
||||
} else {
|
||||
addInfoOrDont('video_url', twitterData.player);
|
||||
addInfoOrDont('video_width', twitterData.player_width);
|
||||
addInfoOrDont('video_height', twitterData.player_height);
|
||||
}
|
||||
|
||||
if(version < 2) {
|
||||
info.image = info.image_url;
|
||||
if(info.video_width > 0)
|
||||
info.width = info.video_width;
|
||||
else if(info.image_width > 0)
|
||||
info.width = info.image_width;
|
||||
if(info.video_height > 0)
|
||||
info.height = info.video_height;
|
||||
else if(info.image_height > 0)
|
||||
info.height = info.image_height;
|
||||
}
|
||||
|
||||
const linkedDatas = extractLinkedData(html);
|
||||
if(linkedDatas.length > 0)
|
||||
info.lds = linkedDatas;
|
||||
|
||||
if(isAllowedOEmbedDomain(urlInfo.host)) {
|
||||
const oEmbedData = await extractOEmbedData(html, url, urlInfo);
|
||||
if(oEmbedData.version)
|
||||
info.oembed = oEmbedData;
|
||||
}
|
||||
// idk what to do with this yet, only including this in debug mode for now
|
||||
if(isDebug && linkedDatas.length > 0)
|
||||
info._lds = linkedDatas;
|
||||
} else {
|
||||
if(contentType[0].startsWith('image/')) {
|
||||
//
|
||||
} else if(contentType[0].startsWith('video/')) {
|
||||
//
|
||||
} else if(contentType[0].startsWith('audio/')) {
|
||||
//
|
||||
const isAudio = contentType[0].startsWith('audio/');
|
||||
const isImage = contentType[0].startsWith('image/');
|
||||
const isVideo = contentType[0].startsWith('video/');
|
||||
|
||||
if(isAudio || isImage || isVideo) {
|
||||
// this still seems like a terrible idea lol
|
||||
const { code, stdout, stderr } = await (new Deno.Command('ffprobe', {
|
||||
stdin: 'null',
|
||||
stdout: 'piped',
|
||||
stderr: 'piped',
|
||||
args: [
|
||||
'-show_streams',
|
||||
'-show_format',
|
||||
'-print_format', 'json',
|
||||
'-v', 'quiet',
|
||||
'-i', url
|
||||
],
|
||||
})).output();
|
||||
|
||||
if(code !== 0) {
|
||||
console.error(new TextDecoder().decode(stderr));
|
||||
} else {
|
||||
const probe = JSON.parse(new TextDecoder().decode(stdout).trim());
|
||||
if(isDebug)
|
||||
info._ffprobe = probe;
|
||||
|
||||
if(typeof probe?.format === 'object') {
|
||||
const media = {};
|
||||
info.media = media;
|
||||
media.confidence = Math.min(1, Math.max(0, probe.format.probe_score / 100.0));
|
||||
|
||||
const pfDuration = parseFloat(probe.format.duration);
|
||||
if(!isNaN(pfDuration))
|
||||
media.duration = pfDuration;
|
||||
|
||||
const pfSize = parseInt(probe.format.size);
|
||||
if(!isNaN(pfSize))
|
||||
media.size = pfSize;
|
||||
|
||||
const pfBitRate = parseInt(probe.format.bit_rate);
|
||||
if(!isNaN(pfBitRate)) {
|
||||
if(version < 2)
|
||||
media.bitRate = pfBitRate;
|
||||
else
|
||||
media.bitrate = pfBitRate;
|
||||
}
|
||||
|
||||
// in Title case cus JS doesnt have an accessible lcfirst equivalent :p
|
||||
const pftFields = ['Title', 'Artist', 'Album', 'Date', 'Comment', 'Genre'];
|
||||
|
||||
if(Array.isArray(probe.streams))
|
||||
for(const stream of probe.streams)
|
||||
if(stream.codec_type === 'video') {
|
||||
media.width = stream.coded_width ?? stream.width ?? 0;
|
||||
media.height = stream.coded_height ?? stream.height ?? 0;
|
||||
|
||||
if(typeof stream.display_aspect_ratio === 'string') {
|
||||
if(version < 2)
|
||||
media.aspectRatio = stream.display_aspect_ratio;
|
||||
else
|
||||
media.aspect_ratio = stream.display_aspect_ratio;
|
||||
}
|
||||
} else if(stream.codec_type === 'audio') {
|
||||
if(typeof stream.tags === 'object')
|
||||
for(const pftFieldName of pftFields) {
|
||||
const pftFieldValue = stream.tags[pftFieldName]
|
||||
?? probe.format.tags[pftFieldName.toLowerCase()]
|
||||
?? probe.format.tags[pftFieldName.toUpperCase()];
|
||||
|
||||
if(typeof pftFieldValue === 'string') {
|
||||
if(typeof media.tags !== 'object')
|
||||
media.tags = {};
|
||||
|
||||
media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(typeof probe.format.tags === 'object')
|
||||
for(const pftFieldName of pftFields) {
|
||||
const pftFieldValue = probe.format.tags[pftFieldName]
|
||||
?? probe.format.tags[pftFieldName.toLowerCase()]
|
||||
?? probe.format.tags[pftFieldName.toUpperCase()];
|
||||
|
||||
if(typeof pftFieldValue === 'string') {
|
||||
if(typeof media.tags !== 'object')
|
||||
media.tags = {};
|
||||
|
||||
media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(isAudio) {
|
||||
info.audio_url = url;
|
||||
info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/audio?url=${encodeURIComponent(url)}`;
|
||||
info.image_type = 'image/png';
|
||||
|
||||
let title = '';
|
||||
if(typeof info.media.tags.artist === 'string')
|
||||
title += `${info.media.tags.artist} - `;
|
||||
if(typeof info.media.tags.title === 'string')
|
||||
title += info.media.tags.title;
|
||||
if(typeof info.media.tags.date === 'string')
|
||||
title += ` (${info.media.tags.date})`;
|
||||
title = title.trim();
|
||||
if(title !== '')
|
||||
info.title = title;
|
||||
|
||||
if(typeof info.media.tags.comment === 'string')
|
||||
info.description = info.media.tags.comment.trim();
|
||||
} else if(isImage) {
|
||||
info.image_url = url;
|
||||
info.image_type = info.media_type;
|
||||
|
||||
if(info.media.width > 0)
|
||||
info.width = info.image_width = info.media.width;
|
||||
if(info.media.height > 0)
|
||||
info.height = info.image_height = info.media.height;
|
||||
} else if(isVideo) {
|
||||
info.video_url = url;
|
||||
info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/video?url=${encodeURIComponent(url)}`;
|
||||
info.image_type = 'image/png';
|
||||
|
||||
if(info.media.width > 0)
|
||||
info.image_width = info.width = info.video_width = info.media.width;
|
||||
if(info.media.height > 0)
|
||||
info.image_height = info.height = info.video_height = info.media.height;
|
||||
}
|
||||
|
||||
if(version < 2) {
|
||||
info.image = info.image_url;
|
||||
|
||||
if(isAudio)
|
||||
info.is_audio = true;
|
||||
else if(isImage)
|
||||
info.is_image = true;
|
||||
else if(isVideo)
|
||||
info.is_video = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(isAllowedOEmbedDomain(urlInfo.host)) {
|
||||
const oEmbedData = await extractOEmbedData(response, html, url, urlInfo);
|
||||
if(oEmbedData.version)
|
||||
info.oembed = oEmbedData;
|
||||
}
|
||||
|
||||
if(version < 2 && info.video_url) {
|
||||
if(info.video_url.startsWith('https://www.youtube.com/')) {
|
||||
const ytVidUrl = new URL(info.video_url);
|
||||
const ytVidUrlParams = new URLSearchParams(ytVidUrl.search);
|
||||
info.type = 'youtube:video';
|
||||
info.youtube_video_id = basename(ytVidUrl.pathname);
|
||||
if(ytVidUrlParams.has('list'))
|
||||
info.youtube_playlist = ytVidUrlParams.get('list');
|
||||
} else if(info.video_url.startsWith('https://embed.nicovideo.jp/')) {
|
||||
const nndVidUrl = new URL(info.video_url);
|
||||
info.type = 'niconico:video';
|
||||
info.nicovideo_video_id = basename(nndVidUrl.pathname);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -470,6 +685,7 @@ const requestHandler = async (req: Request): Response => {
|
|||
return new Response('', { status: 405, headers });
|
||||
|
||||
const started = performance.now();
|
||||
const urlParams = new URLSearchParams(url.search);
|
||||
|
||||
headers['Content-Type'] = 'application/json;charset=utf-8';
|
||||
|
||||
|
@ -477,7 +693,7 @@ const requestHandler = async (req: Request): Response => {
|
|||
if(req.method === 'POST')
|
||||
urlParamRaw = (await readableStreamToString(req.body)).trim();
|
||||
else
|
||||
urlParamRaw = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
|
||||
urlParamRaw = urlParams.get('url')?.trim() ?? '';
|
||||
|
||||
if(urlParamRaw === '')
|
||||
return new Response('{"error":"metadata:uri"}', { status: 400, headers });
|
||||
|
@ -493,10 +709,15 @@ const requestHandler = async (req: Request): Response => {
|
|||
|
||||
urlParamRaw = urlParam.toString();
|
||||
|
||||
const formatVersion = parseInt(urlParams.get('fv')) || 1;
|
||||
|
||||
if(formatVersion < 1 || formatVersion > 2)
|
||||
return new Response('{"error":"metadata:version"}', { status: 400, headers });
|
||||
|
||||
const urlHash = encodeBase64Url(
|
||||
await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw))
|
||||
);
|
||||
const cacheKey = `uiharu:metadata:${urlHash}`;
|
||||
const cacheKey = `uiharu:metadata:fv${formatVersion}:${urlHash}`;
|
||||
// const cacheInfo = await cache.get(cacheKey);
|
||||
// if(cacheInfo !== undefined)
|
||||
// return new Response(
|
||||
|
@ -515,7 +736,7 @@ const requestHandler = async (req: Request): Response => {
|
|||
|
||||
try {
|
||||
const json = JSON.stringify(
|
||||
await extractMetadata(urlParamRaw, urlParam)
|
||||
await extractMetadata(formatVersion, urlParamRaw, urlParam)
|
||||
);
|
||||
|
||||
cache.set(cacheKey, brotliCompressSync(json), {
|
||||
|
|
Loading…
Reference in a new issue