2024-10-28 03:40:26 +00:00
|
|
|
import * as cheerio from 'npm:cheerio@^1.0.0';
|
|
|
|
import { MemcacheClient } from 'npm:memcache-client@^1.0.5';
|
|
|
|
import { existsSync } from "jsr:@std/fs";
|
|
|
|
import { basename, join as pathJoin } from "jsr:@std/path";
|
|
|
|
import { normalize as pathNormalize } from "jsr:@std/path/normalize";
|
|
|
|
import { encodeBase64Url } from "jsr:@std/encoding/base64url";
|
|
|
|
import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
|
|
|
|
import { parseMediaType } from "jsr:@std/media-types";
|
|
|
|
import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts";
|
|
|
|
|
|
|
|
// todo: these should not be hardcoded lol
|
2024-10-28 18:32:02 +00:00
|
|
|
const hostName: String = 'uiharu.edgii.net';
|
2024-10-28 03:40:26 +00:00
|
|
|
const port: Number = 3009;
|
2024-10-28 18:32:02 +00:00
|
|
|
const memcacheServer: String = '127.0.0.1:11211';
|
2024-10-28 03:40:26 +00:00
|
|
|
const allowedOrigins: String[] = [
|
|
|
|
'edgii.net',
|
|
|
|
'chat.edgii.net',
|
|
|
|
'sockchat.edgii.net',
|
|
|
|
'ajaxchat.edgii.net',
|
|
|
|
];
|
|
|
|
const allowOEmbed: String[] = [ // copied from wordpress source sorta
|
|
|
|
'.youtube.com',
|
|
|
|
'.youtu.be',
|
|
|
|
'.vimeo.com',
|
|
|
|
'.dailymotion.com',
|
|
|
|
'.dai.ly',
|
|
|
|
'.flickr.com',
|
|
|
|
'.flic.kr',
|
|
|
|
'.smugmug.com',
|
|
|
|
'.scribd.com',
|
|
|
|
'.wordpress.tv',
|
|
|
|
'.crowdsignal.net',
|
|
|
|
'.polldaddy.com',
|
|
|
|
'.poll.fm',
|
|
|
|
'.survey.fm',
|
|
|
|
'.twitter.com',
|
|
|
|
'.soundcloud.com',
|
|
|
|
'.spotify.com',
|
|
|
|
'.imgur.com',
|
|
|
|
'.issuu.com',
|
|
|
|
'.mixcloud.com',
|
|
|
|
'.ted.com',
|
|
|
|
'.animoto.com',
|
|
|
|
'.video214.com',
|
|
|
|
'.tumblr.com',
|
|
|
|
'.kickstarter.com',
|
|
|
|
'.kck.st',
|
|
|
|
'.cloudup.com',
|
|
|
|
'.reverbnation.com',
|
|
|
|
'.videopress.com',
|
|
|
|
'.reddit.com',
|
|
|
|
'.speakerdeck.com',
|
|
|
|
'.screencast.com',
|
|
|
|
'.amazon.com',
|
|
|
|
'.amazon.com.mx',
|
|
|
|
'.amazon.com.br',
|
|
|
|
'.amazon.ca',
|
|
|
|
'.amazon.co.uk',
|
|
|
|
'.amazon.de',
|
|
|
|
'.amazon.fr',
|
|
|
|
'.amazon.it',
|
|
|
|
'.amazon.es',
|
|
|
|
'.amazon.in',
|
|
|
|
'.amazon.nl',
|
|
|
|
'.amazon.ru',
|
|
|
|
'.amazon.co.jp',
|
|
|
|
'.amazon.com.au',
|
|
|
|
'.amazon.cn',
|
|
|
|
'.a.co',
|
|
|
|
'.amzn.to',
|
|
|
|
'.amzn.eu',
|
|
|
|
'.amzn.in',
|
|
|
|
'.amzn.asia',
|
|
|
|
'.z.cn',
|
|
|
|
'.somecards.com',
|
|
|
|
'.some.ly',
|
|
|
|
'.tiktok.com',
|
|
|
|
'.pinterest.com',
|
|
|
|
'.pinterest.com.au',
|
|
|
|
'.pinterest.com.mx',
|
|
|
|
'.wolframcloud.com',
|
2024-10-28 18:32:02 +00:00
|
|
|
'.instagram.com',
|
|
|
|
'.facebook.com',
|
2024-10-28 03:40:26 +00:00
|
|
|
'.pca.st',
|
|
|
|
'.anghami.com',
|
|
|
|
'.bsky.app',
|
|
|
|
'.apple.com',
|
2024-10-28 18:32:02 +00:00
|
|
|
'.flashii.net',
|
|
|
|
'.fii.moe',
|
|
|
|
'.tako.zone',
|
|
|
|
'.patchii.net',
|
|
|
|
'.railgun.sh',
|
|
|
|
'.flash.moe',
|
|
|
|
'.edgii.net',
|
2024-10-28 03:40:26 +00:00
|
|
|
];
|
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
const appVersion: String = '20241028';
|
2024-10-28 03:40:26 +00:00
|
|
|
const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug'));
|
|
|
|
const cache: MemcacheClient = new MemcacheClient({
|
|
|
|
server: memcacheServer,
|
|
|
|
compressor: {
|
|
|
|
// fuck it lol
|
|
|
|
compressSync: buffer => buffer,
|
|
|
|
decompressSync: buffer => buffer,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
const uiharuFetch = async (url, init) => {
|
|
|
|
if(!init)
|
|
|
|
init = {};
|
|
|
|
if(!init.headers)
|
|
|
|
init.headers = {};
|
|
|
|
if(!init.headers['Accept'])
|
|
|
|
init.headers['Accept'] = 'text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8';
|
|
|
|
if(!init.headers['Accept-Language'])
|
|
|
|
init.headers['Accept-Language'] = 'en-GB, en;q=0.9, ja-jp;q=0.6, *;q=0.5';
|
|
|
|
if(!init.headers['User-Agent'])
|
|
|
|
init.headers['User-Agent'] = `Mozilla/5.0 (compatible; Uiharu/${appVersion}; +http://fii.moe/uiharu)`;
|
|
|
|
|
|
|
|
return await fetch(url, init);
|
|
|
|
};
|
|
|
|
|
2024-10-28 03:40:26 +00:00
|
|
|
const readableStreamToString = async (stream?: ReadableStream): string => {
|
|
|
|
if(stream === null)
|
|
|
|
return '';
|
|
|
|
|
|
|
|
const reader = stream.getReader();
|
|
|
|
const decoder = new TextDecoder;
|
|
|
|
let result = '';
|
|
|
|
|
|
|
|
for(;;) {
|
|
|
|
const { done, value } = await reader.read();
|
|
|
|
if(done) break;
|
|
|
|
result += decoder.decode(value, { stream: true });
|
|
|
|
}
|
|
|
|
|
|
|
|
result += decoder.decode();
|
|
|
|
|
|
|
|
return result;
|
|
|
|
};
|
|
|
|
|
|
|
|
const isAllowedOEmbedDomain = (domain: string): Boolean => {
|
|
|
|
if(!domain.startsWith('.'))
|
|
|
|
domain = '.' + domain;
|
|
|
|
|
|
|
|
for(const suffix of allowOEmbed)
|
|
|
|
if(domain.endsWith(suffix))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
const extractHtmlMetaData = (html) => {
|
|
|
|
const values = {};
|
|
|
|
|
|
|
|
const titleTag = html('title')?.first()?.text().trim() ?? '';
|
|
|
|
if(titleTag.length > 0)
|
|
|
|
values.title = titleTag;
|
|
|
|
|
|
|
|
const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? '';
|
|
|
|
if(metaDescriptionTag.length > 0)
|
|
|
|
values.description = metaDescriptionTag;
|
|
|
|
|
|
|
|
const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? '';
|
|
|
|
if(metaThumbnailTag.length > 0)
|
|
|
|
values.thumbnail = metaThumbnailTag;
|
|
|
|
|
|
|
|
const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? '';
|
|
|
|
if(metaThemeColorTag.length > 0)
|
|
|
|
values.theme_color = Color.string(metaThemeColorTag).hex();
|
|
|
|
|
|
|
|
const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? '';
|
|
|
|
if(linkImageSrcTag.length > 0)
|
|
|
|
values.image = linkImageSrcTag;
|
|
|
|
|
|
|
|
const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? '';
|
|
|
|
if(linkCanonicalTag.length > 0)
|
|
|
|
values.canonical_url = linkCanonicalTag;
|
|
|
|
|
|
|
|
return values;
|
|
|
|
};
|
|
|
|
|
|
|
|
const extractOpenGraphData = (html) => {
|
|
|
|
const values = {};
|
|
|
|
|
|
|
|
// this is hateful
|
|
|
|
const properties = {
|
|
|
|
'url': { type: 'url' },
|
|
|
|
'type': { type: 'str' },
|
|
|
|
'title': { type: 'str' },
|
|
|
|
'locale': { type: 'str' },
|
|
|
|
'locale:alternate': { type: 'str', array: 'locales' },
|
|
|
|
'description': { type: 'str' },
|
|
|
|
'determiner': { type: 'str' },
|
|
|
|
'site_name': { type: 'str' },
|
|
|
|
|
|
|
|
'image': { alias: 'image:url', array: 'images' },
|
|
|
|
'image:url': { of: 'image', type: 'url' },
|
|
|
|
'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] },
|
|
|
|
'image:type': { of: 'image', type: 'mime' },
|
|
|
|
'image:width': { of: 'image', type: 'int' },
|
|
|
|
'image:height': { of: 'image', type: 'int' },
|
|
|
|
'image:alt': { of: 'image', type: 'string' },
|
|
|
|
|
|
|
|
'video': { alias: 'video:url', array: 'videos' },
|
|
|
|
'video:url': { of: 'video', type: 'url' },
|
|
|
|
'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] },
|
|
|
|
'video:type': { of: 'video', type: 'mime' },
|
|
|
|
'video:width': { of: 'video', type: 'int' },
|
|
|
|
'video:height': { of: 'video', type: 'int' },
|
|
|
|
'video:tag': { of: 'video', type: 'str', array: 'tags' },
|
|
|
|
|
|
|
|
'audio': { alias: 'audio:url', array: 'audios' },
|
|
|
|
'audio:url': { of: 'audio', type: 'url' },
|
|
|
|
'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] },
|
|
|
|
'audio:type': { of: 'audio', type: 'mime' },
|
|
|
|
};
|
|
|
|
|
|
|
|
const tags = html('meta[property^="og:"]');
|
|
|
|
for(const tagInfo of tags) {
|
|
|
|
const tag = html(tagInfo);
|
|
|
|
|
|
|
|
let name = (tag.attr('property')?.trim() ?? '').substring(3);
|
|
|
|
if(!(name in properties))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
let value = tag.attr('content')?.trim() ?? '';
|
|
|
|
|
|
|
|
let propInfo = properties[name];
|
|
|
|
let target = values;
|
|
|
|
|
|
|
|
if(propInfo.alias) {
|
|
|
|
name = propInfo.alias;
|
|
|
|
propInfo = properties[name];
|
|
|
|
}
|
|
|
|
|
|
|
|
if(propInfo.of) {
|
|
|
|
name = name.substring(propInfo.of.length + 1);
|
|
|
|
const objInfo = properties[propInfo.of];
|
|
|
|
|
|
|
|
if(objInfo.array) {
|
|
|
|
if(objInfo.array in target)
|
|
|
|
target = target[objInfo.array];
|
|
|
|
else
|
|
|
|
target = target[objInfo.array] = [];
|
|
|
|
|
|
|
|
const lastItem = target[target.length - 1];
|
|
|
|
if(lastItem === undefined || name in lastItem) {
|
|
|
|
const newItem = {};
|
|
|
|
target.push(newItem);
|
|
|
|
target = newItem;
|
|
|
|
} else
|
|
|
|
target = lastItem;
|
|
|
|
} else {
|
|
|
|
if(!(name in target))
|
|
|
|
target[name] = {};
|
|
|
|
|
|
|
|
target = target[name];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(propInfo.array) {
|
|
|
|
if(propInfo.array in target)
|
|
|
|
target = target[propInfo.array];
|
|
|
|
else
|
|
|
|
target = target[propInfo.array] = [];
|
|
|
|
} else if(name in target)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if(propInfo.type === 'int')
|
|
|
|
value = parseInt(value);
|
|
|
|
else {
|
|
|
|
if(propInfo.type === 'mime') {
|
|
|
|
// world's most naive validation
|
|
|
|
if(value.indexOf('/') < 0)
|
|
|
|
value = undefined;
|
|
|
|
} else if(propInfo.type === 'url') {
|
|
|
|
try {
|
|
|
|
const protos = propInfo.protos ?? ['https:', 'http:'];
|
|
|
|
if(!protos.includes(new URL(value).protocol))
|
|
|
|
value = undefined;
|
|
|
|
} catch(ex) {
|
|
|
|
console.error(ex);
|
|
|
|
value = undefined;
|
|
|
|
}
|
|
|
|
} else if(propInfo.type !== 'str')
|
|
|
|
value = undefined;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(value) {
|
|
|
|
if(propInfo.array)
|
|
|
|
target.push(value);
|
|
|
|
else
|
|
|
|
target[name] = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return values;
|
|
|
|
};
|
|
|
|
|
|
|
|
const extractTwitterData = (html) => {
|
|
|
|
const values = {};
|
|
|
|
const properties = [
|
|
|
|
'card',
|
|
|
|
'site',
|
|
|
|
'site:id',
|
|
|
|
'creator',
|
|
|
|
'creator:id',
|
|
|
|
'description',
|
|
|
|
'title',
|
|
|
|
'image',
|
|
|
|
'image:alt',
|
|
|
|
'player',
|
|
|
|
'player:width',
|
|
|
|
'player:height',
|
|
|
|
'player:stream',
|
|
|
|
];
|
|
|
|
|
|
|
|
for(const property of properties) {
|
|
|
|
const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? '';
|
|
|
|
if(tag.length > 0)
|
|
|
|
values[property.replace(':', '_')] = tag;
|
|
|
|
}
|
|
|
|
|
|
|
|
return values;
|
|
|
|
};
|
|
|
|
|
|
|
|
const extractLinkedData = (html) => {
|
|
|
|
const values = [];
|
|
|
|
|
|
|
|
const tags = html('script[type="application/ld+json"]');
|
|
|
|
for(const tagInfo of tags)
|
|
|
|
try {
|
|
|
|
values.push(JSON.parse(html(tagInfo).text().trim()));
|
|
|
|
} catch(ex) {
|
|
|
|
console.error(ex);
|
|
|
|
}
|
|
|
|
|
|
|
|
return values;
|
|
|
|
};
|
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
const parseLinkHeader = (header: string) => {
|
|
|
|
const links = [];
|
|
|
|
|
|
|
|
const lines = header.split(',');
|
|
|
|
for(const key in lines) {
|
|
|
|
const parts = lines[key].trim().split(';').map(part => part.trim());
|
|
|
|
|
|
|
|
let href = parts.shift();
|
|
|
|
if(typeof href !== 'string' || !href.startsWith('<') || !href.endsWith('>'))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
href = decodeURI(href.slice(1, -1));
|
|
|
|
const link = {};
|
|
|
|
links.push(link);
|
|
|
|
|
|
|
|
for(const part of parts) {
|
|
|
|
const attr = part.split('=', 2);
|
|
|
|
let value = attr[1];
|
|
|
|
if(value.startsWith('"') && value.endsWith('"'))
|
|
|
|
value = value.slice(1, -1);
|
|
|
|
|
|
|
|
link[attr[0]] = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
// applying this last to avoid tomfoolery :3
|
|
|
|
link.href = href;
|
|
|
|
}
|
|
|
|
|
|
|
|
return links;
|
|
|
|
};
|
2024-10-28 03:40:26 +00:00
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
const extractOEmbedData = async (response: Response, html, url: string, urlInfo: URL) => {
|
2024-10-28 03:40:26 +00:00
|
|
|
let oEmbedUrl: string = '';
|
|
|
|
|
|
|
|
// idk how long i'll bother with this for
|
|
|
|
if(urlInfo.host === 'x.com' || urlInfo.host === 'twitter.com')
|
|
|
|
oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`;
|
2024-10-28 18:32:02 +00:00
|
|
|
else if(html !== undefined)
|
2024-10-28 03:40:26 +00:00
|
|
|
oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? '';
|
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
if(oEmbedUrl === '') {
|
|
|
|
const links = parseLinkHeader(response.headers.get('link') ?? '');
|
|
|
|
for(const link of links)
|
|
|
|
if(link.rel === 'alternate' && link.type === 'application/json+oembed') {
|
|
|
|
oEmbedUrl = link.href;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-28 03:40:26 +00:00
|
|
|
if(oEmbedUrl === '')
|
|
|
|
return {};
|
|
|
|
|
|
|
|
try {
|
2024-10-28 18:32:02 +00:00
|
|
|
return (await uiharuFetch(oEmbedUrl)).json();
|
2024-10-28 03:40:26 +00:00
|
|
|
} catch(ex) {
|
|
|
|
console.error(ex);
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
const extractMetadata = async (version: number, url: string, urlInfo: URL) => {
|
|
|
|
const response = await uiharuFetch(url);
|
|
|
|
const contentTypeRaw = response.headers.get('content-type') ?? '';
|
2024-10-28 03:40:26 +00:00
|
|
|
const contentType = parseMediaType(contentTypeRaw);
|
|
|
|
|
|
|
|
const info = {};
|
|
|
|
const addInfoOrDont = (prop, value) => {
|
|
|
|
if(value !== null && value !== undefined)
|
|
|
|
info[prop] = value;
|
|
|
|
};
|
|
|
|
|
2024-10-28 03:49:22 +00:00
|
|
|
info.url = url;
|
|
|
|
info.title = decodeURIComponent(basename(urlInfo.pathname));
|
|
|
|
info.site_name = urlInfo.host;
|
|
|
|
|
|
|
|
if(contentType[0])
|
2024-10-28 03:40:26 +00:00
|
|
|
info.media_type = contentType[0];
|
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
let html = undefined;
|
|
|
|
|
2024-10-28 03:40:26 +00:00
|
|
|
if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) {
|
2024-10-28 18:32:02 +00:00
|
|
|
html = cheerio.load(await readableStreamToString(response.body));
|
2024-10-28 03:40:26 +00:00
|
|
|
|
|
|
|
const metaData = extractHtmlMetaData(html);
|
|
|
|
const ogData = extractOpenGraphData(html);
|
|
|
|
const twitterData = extractTwitterData(html);
|
|
|
|
|
2024-10-28 03:49:22 +00:00
|
|
|
addInfoOrDont('url', ogData.url ?? metaData.canonical_url);
|
2024-10-28 03:40:26 +00:00
|
|
|
addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title);
|
|
|
|
addInfoOrDont('site_name', ogData.site_name);
|
|
|
|
addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description);
|
|
|
|
addInfoOrDont('color', metaData.theme_color);
|
|
|
|
|
|
|
|
if(ogData.images?.length > 0) {
|
|
|
|
const image = ogData.images[0];
|
2024-10-28 18:32:02 +00:00
|
|
|
info.image_url = image.secure_url ?? image.url;
|
2024-10-28 03:40:26 +00:00
|
|
|
if(image.width > 0)
|
|
|
|
info.image_width = image.width;
|
|
|
|
if(image.height > 0)
|
|
|
|
info.image_height = image.height;
|
|
|
|
if(image.type)
|
|
|
|
info.image_type = image.type;
|
|
|
|
if(image.alt)
|
|
|
|
info.image_alt = image.alt;
|
|
|
|
} else {
|
|
|
|
addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail);
|
|
|
|
addInfoOrDont('image_alt', twitterData.image_alt);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(ogData.audios?.length > 0) {
|
|
|
|
const audio = ogData.audios[0];
|
|
|
|
info.audio_url = audio.secure_url ?? audio.url;
|
|
|
|
if(audio.type)
|
|
|
|
info.audio_type = audio.type;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(ogData.videos?.length > 0) {
|
|
|
|
const video = ogData.videos[0];
|
|
|
|
info.video_url = video.secure_url ?? video.url;
|
|
|
|
if(video.width > 0)
|
|
|
|
info.video_width = video.width;
|
|
|
|
if(video.height > 0)
|
|
|
|
info.video_height = video.height;
|
|
|
|
if(video.type)
|
|
|
|
info.video_type = video.type;
|
|
|
|
if(video.tags?.length > 0)
|
|
|
|
info.video_tags = video.tags;
|
|
|
|
} else {
|
|
|
|
addInfoOrDont('video_url', twitterData.player);
|
|
|
|
addInfoOrDont('video_width', twitterData.player_width);
|
|
|
|
addInfoOrDont('video_height', twitterData.player_height);
|
2024-10-28 18:32:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if(version < 2) {
|
|
|
|
info.image = info.image_url;
|
2024-10-28 03:40:26 +00:00
|
|
|
if(info.video_width > 0)
|
|
|
|
info.width = info.video_width;
|
2024-10-28 18:32:02 +00:00
|
|
|
else if(info.image_width > 0)
|
|
|
|
info.width = info.image_width;
|
2024-10-28 03:40:26 +00:00
|
|
|
if(info.video_height > 0)
|
|
|
|
info.height = info.video_height;
|
2024-10-28 18:32:02 +00:00
|
|
|
else if(info.image_height > 0)
|
|
|
|
info.height = info.image_height;
|
2024-10-28 03:40:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const linkedDatas = extractLinkedData(html);
|
2024-10-28 18:32:02 +00:00
|
|
|
// idk what to do with this yet, only including this in debug mode for now
|
|
|
|
if(isDebug && linkedDatas.length > 0)
|
|
|
|
info._lds = linkedDatas;
|
|
|
|
} else {
|
|
|
|
const isAudio = contentType[0].startsWith('audio/');
|
|
|
|
const isImage = contentType[0].startsWith('image/');
|
|
|
|
const isVideo = contentType[0].startsWith('video/');
|
|
|
|
|
|
|
|
if(isAudio || isImage || isVideo) {
|
|
|
|
// this still seems like a terrible idea lol
|
|
|
|
const { code, stdout, stderr } = await (new Deno.Command('ffprobe', {
|
|
|
|
stdin: 'null',
|
|
|
|
stdout: 'piped',
|
|
|
|
stderr: 'piped',
|
|
|
|
args: [
|
|
|
|
'-show_streams',
|
|
|
|
'-show_format',
|
|
|
|
'-print_format', 'json',
|
|
|
|
'-v', 'quiet',
|
|
|
|
'-i', url
|
|
|
|
],
|
|
|
|
})).output();
|
|
|
|
|
|
|
|
if(code !== 0) {
|
|
|
|
console.error(new TextDecoder().decode(stderr));
|
|
|
|
} else {
|
|
|
|
const probe = JSON.parse(new TextDecoder().decode(stdout).trim());
|
|
|
|
if(isDebug)
|
|
|
|
info._ffprobe = probe;
|
|
|
|
|
|
|
|
if(typeof probe?.format === 'object') {
|
|
|
|
const media = {};
|
|
|
|
info.media = media;
|
|
|
|
media.confidence = Math.min(1, Math.max(0, probe.format.probe_score / 100.0));
|
|
|
|
|
|
|
|
const pfDuration = parseFloat(probe.format.duration);
|
|
|
|
if(!isNaN(pfDuration))
|
|
|
|
media.duration = pfDuration;
|
|
|
|
|
|
|
|
const pfSize = parseInt(probe.format.size);
|
|
|
|
if(!isNaN(pfSize))
|
|
|
|
media.size = pfSize;
|
|
|
|
|
|
|
|
const pfBitRate = parseInt(probe.format.bit_rate);
|
|
|
|
if(!isNaN(pfBitRate)) {
|
|
|
|
if(version < 2)
|
|
|
|
media.bitRate = pfBitRate;
|
|
|
|
else
|
|
|
|
media.bitrate = pfBitRate;
|
|
|
|
}
|
|
|
|
|
|
|
|
// in Title case cus JS doesnt have an accessible lcfirst equivalent :p
|
|
|
|
const pftFields = ['Title', 'Artist', 'Album', 'Date', 'Comment', 'Genre'];
|
|
|
|
|
|
|
|
if(Array.isArray(probe.streams))
|
|
|
|
for(const stream of probe.streams)
|
|
|
|
if(stream.codec_type === 'video') {
|
|
|
|
media.width = stream.coded_width ?? stream.width ?? 0;
|
|
|
|
media.height = stream.coded_height ?? stream.height ?? 0;
|
|
|
|
|
|
|
|
if(typeof stream.display_aspect_ratio === 'string') {
|
|
|
|
if(version < 2)
|
|
|
|
media.aspectRatio = stream.display_aspect_ratio;
|
|
|
|
else
|
|
|
|
media.aspect_ratio = stream.display_aspect_ratio;
|
|
|
|
}
|
|
|
|
} else if(stream.codec_type === 'audio') {
|
|
|
|
if(typeof stream.tags === 'object')
|
|
|
|
for(const pftFieldName of pftFields) {
|
|
|
|
const pftFieldValue = stream.tags[pftFieldName]
|
|
|
|
?? probe.format.tags[pftFieldName.toLowerCase()]
|
|
|
|
?? probe.format.tags[pftFieldName.toUpperCase()];
|
|
|
|
|
|
|
|
if(typeof pftFieldValue === 'string') {
|
|
|
|
if(typeof media.tags !== 'object')
|
|
|
|
media.tags = {};
|
|
|
|
|
|
|
|
media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(typeof probe.format.tags === 'object')
|
|
|
|
for(const pftFieldName of pftFields) {
|
|
|
|
const pftFieldValue = probe.format.tags[pftFieldName]
|
|
|
|
?? probe.format.tags[pftFieldName.toLowerCase()]
|
|
|
|
?? probe.format.tags[pftFieldName.toUpperCase()];
|
|
|
|
|
|
|
|
if(typeof pftFieldValue === 'string') {
|
|
|
|
if(typeof media.tags !== 'object')
|
|
|
|
media.tags = {};
|
|
|
|
|
|
|
|
media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(isAudio) {
|
|
|
|
info.audio_url = url;
|
|
|
|
info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/audio?url=${encodeURIComponent(url)}`;
|
|
|
|
info.image_type = 'image/png';
|
|
|
|
|
|
|
|
let title = '';
|
|
|
|
if(typeof info.media.tags.artist === 'string')
|
|
|
|
title += `${info.media.tags.artist} - `;
|
|
|
|
if(typeof info.media.tags.title === 'string')
|
|
|
|
title += info.media.tags.title;
|
|
|
|
if(typeof info.media.tags.date === 'string')
|
|
|
|
title += ` (${info.media.tags.date})`;
|
|
|
|
title = title.trim();
|
|
|
|
if(title !== '')
|
|
|
|
info.title = title;
|
|
|
|
|
|
|
|
if(typeof info.media.tags.comment === 'string')
|
|
|
|
info.description = info.media.tags.comment.trim();
|
|
|
|
} else if(isImage) {
|
|
|
|
info.image_url = url;
|
|
|
|
info.image_type = info.media_type;
|
|
|
|
|
|
|
|
if(info.media.width > 0)
|
|
|
|
info.width = info.image_width = info.media.width;
|
|
|
|
if(info.media.height > 0)
|
|
|
|
info.height = info.image_height = info.media.height;
|
|
|
|
} else if(isVideo) {
|
|
|
|
info.video_url = url;
|
|
|
|
info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/video?url=${encodeURIComponent(url)}`;
|
|
|
|
info.image_type = 'image/png';
|
|
|
|
|
|
|
|
if(info.media.width > 0)
|
|
|
|
info.image_width = info.width = info.video_width = info.media.width;
|
|
|
|
if(info.media.height > 0)
|
|
|
|
info.image_height = info.height = info.video_height = info.media.height;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(version < 2) {
|
|
|
|
info.image = info.image_url;
|
2024-10-28 03:40:26 +00:00
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
if(isAudio)
|
|
|
|
info.is_audio = true;
|
|
|
|
else if(isImage)
|
|
|
|
info.is_image = true;
|
|
|
|
else if(isVideo)
|
|
|
|
info.is_video = true;
|
|
|
|
}
|
2024-10-28 03:40:26 +00:00
|
|
|
}
|
2024-10-28 18:32:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if(isAllowedOEmbedDomain(urlInfo.host)) {
|
|
|
|
const oEmbedData = await extractOEmbedData(response, html, url, urlInfo);
|
|
|
|
if(oEmbedData.version)
|
|
|
|
info.oembed = oEmbedData;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(version < 2 && info.video_url) {
|
|
|
|
if(info.video_url.startsWith('https://www.youtube.com/')) {
|
|
|
|
const ytVidUrl = new URL(info.video_url);
|
|
|
|
const ytVidUrlParams = new URLSearchParams(ytVidUrl.search);
|
|
|
|
info.type = 'youtube:video';
|
|
|
|
info.youtube_video_id = basename(ytVidUrl.pathname);
|
|
|
|
if(ytVidUrlParams.has('list'))
|
|
|
|
info.youtube_playlist = ytVidUrlParams.get('list');
|
|
|
|
} else if(info.video_url.startsWith('https://embed.nicovideo.jp/')) {
|
|
|
|
const nndVidUrl = new URL(info.video_url);
|
|
|
|
info.type = 'niconico:video';
|
|
|
|
info.nicovideo_video_id = basename(nndVidUrl.pathname);
|
2024-10-28 03:40:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return info;
|
|
|
|
};
|
|
|
|
|
|
|
|
const requestHandler = async (req: Request): Response => {
|
|
|
|
const url = new URL(req.url);
|
|
|
|
const headers = { 'X-Powered-By': 'Uiharu' };
|
|
|
|
|
|
|
|
if(req.headers.has('origin')) {
|
|
|
|
const originRaw = req.headers.get('origin');
|
|
|
|
const origin = new URL(originRaw);
|
|
|
|
|
|
|
|
if(!allowedOrigins.includes(origin.host))
|
|
|
|
return new Response('403', { status: 403, headers });
|
|
|
|
|
|
|
|
headers['Access-Control-Allow-Origin'] = originRaw;
|
|
|
|
headers['Vary'] = 'Origin';
|
|
|
|
}
|
|
|
|
|
|
|
|
if(req.method === 'OPTIONS') {
|
|
|
|
headers['Allow'] = 'OPTIONS, GET, HEAD, POST';
|
|
|
|
headers['Access-Control-Allow-Methods'] = 'OPTIONS, GET, HEAD, POST';
|
|
|
|
|
|
|
|
// idk if this is the appropriate status code but: balls
|
|
|
|
return new Response('', { status: 204, headers });
|
|
|
|
}
|
|
|
|
|
|
|
|
if(url.pathname === '/metadata') {
|
|
|
|
if(!['GET', 'HEAD', 'POST'].includes(req.method))
|
|
|
|
return new Response('', { status: 405, headers });
|
|
|
|
|
|
|
|
const started = performance.now();
|
2024-10-28 18:32:02 +00:00
|
|
|
const urlParams = new URLSearchParams(url.search);
|
2024-10-28 03:40:26 +00:00
|
|
|
|
|
|
|
headers['Content-Type'] = 'application/json;charset=utf-8';
|
|
|
|
|
|
|
|
let urlParamRaw: String = '';
|
|
|
|
if(req.method === 'POST')
|
|
|
|
urlParamRaw = (await readableStreamToString(req.body)).trim();
|
|
|
|
else
|
2024-10-28 18:32:02 +00:00
|
|
|
urlParamRaw = urlParams.get('url')?.trim() ?? '';
|
2024-10-28 03:40:26 +00:00
|
|
|
|
|
|
|
if(urlParamRaw === '')
|
|
|
|
return new Response('{"error":"metadata:uri"}', { status: 400, headers });
|
|
|
|
if(urlParamRaw.startsWith('//'))
|
|
|
|
urlParamRaw = 'https:' + urlParamRaw;
|
|
|
|
|
|
|
|
let urlParam: URL;
|
|
|
|
try {
|
|
|
|
urlParam = new URL(urlParamRaw);
|
|
|
|
} catch(ex) {
|
|
|
|
return new Response('{"error":"metadata:uri"}', { status: 400, headers });
|
|
|
|
}
|
|
|
|
|
|
|
|
urlParamRaw = urlParam.toString();
|
|
|
|
|
2024-10-28 18:32:02 +00:00
|
|
|
const formatVersion = parseInt(urlParams.get('fv')) || 1;
|
|
|
|
|
|
|
|
if(formatVersion < 1 || formatVersion > 2)
|
|
|
|
return new Response('{"error":"metadata:version"}', { status: 400, headers });
|
|
|
|
|
2024-10-28 03:40:26 +00:00
|
|
|
const urlHash = encodeBase64Url(
|
|
|
|
await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw))
|
|
|
|
);
|
2024-10-28 18:32:02 +00:00
|
|
|
const cacheKey = `uiharu:metadata:fv${formatVersion}:${urlHash}`;
|
2024-10-28 18:38:42 +00:00
|
|
|
const cacheInfo = await cache.get(cacheKey);
|
|
|
|
if(cacheInfo !== undefined)
|
|
|
|
return new Response(
|
|
|
|
brotliDecompressSync(cacheInfo.value),
|
|
|
|
{
|
|
|
|
status: 200,
|
|
|
|
headers: {
|
|
|
|
...headers,
|
|
|
|
...{
|
|
|
|
'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
|
|
|
|
'X-Uiharu-State': 'cache',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
);
|
2024-10-28 03:40:26 +00:00
|
|
|
|
|
|
|
try {
|
|
|
|
const json = JSON.stringify(
|
2024-10-28 18:32:02 +00:00
|
|
|
await extractMetadata(formatVersion, urlParamRaw, urlParam)
|
2024-10-28 03:40:26 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
cache.set(cacheKey, brotliCompressSync(json), {
|
|
|
|
compress: false,
|
|
|
|
lifetime: 600
|
|
|
|
});
|
|
|
|
|
|
|
|
return new Response(json, {
|
|
|
|
status: 200,
|
|
|
|
headers: {
|
|
|
|
...headers,
|
|
|
|
...{
|
|
|
|
'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
|
|
|
|
'X-Uiharu-State': 'fresh',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
});
|
|
|
|
} catch(ex) {
|
|
|
|
console.error(ex);
|
|
|
|
return new Response('{"error":"metadata:lookup"}', { status: 500, headers });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(url.pathname === '/metadata/batch') {
|
|
|
|
if(!['GET', 'HEAD', 'POST'].includes(req.method))
|
|
|
|
return new Response('', { status: 405, headers });
|
|
|
|
|
|
|
|
return new Response('{"took":0,"results":[]}', {
|
|
|
|
headers: {
|
|
|
|
...headers,
|
|
|
|
...{ 'Content-Type': 'application/json' },
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
const isAudio = url.pathname === '/metadata/thumb/audio';
|
|
|
|
const isVideo = url.pathname === '/metadata/thumb/video';
|
|
|
|
if(isAudio || isVideo) {
|
|
|
|
if(!['HEAD', 'GET'].includes(req.method))
|
|
|
|
return new Response('', { status: 405, headers });
|
|
|
|
|
|
|
|
let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
|
|
|
|
if(urlParamRaw === '')
|
|
|
|
return new Response('missing url parameter', { status: 400, headers });
|
|
|
|
|
|
|
|
let scheme: String = '';
|
|
|
|
try {
|
|
|
|
const urlParam = new URL(urlParamRaw);
|
|
|
|
if(typeof urlParam.protocol === 'string')
|
|
|
|
scheme = urlParam.protocol;
|
|
|
|
|
|
|
|
urlParamRaw = urlParam.toString();
|
|
|
|
} catch(ex) {
|
|
|
|
return new Response('invalid url parameter', { status: 400, headers });
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!['http:', 'https:'].includes(scheme))
|
|
|
|
return new Response('unsupported url scheme', { status: 400, headers });
|
|
|
|
|
|
|
|
// this seems like a terrible idea lol
|
|
|
|
const args = ['-i', urlParamRaw];
|
|
|
|
if(isAudio) args.push('-an');
|
|
|
|
args.push('-f');
|
|
|
|
args.push('image2pipe');
|
|
|
|
args.push('-c:v');
|
|
|
|
args.push(isVideo ? 'png' : 'copy');
|
|
|
|
args.push('-frames:v');
|
|
|
|
args.push('1');
|
|
|
|
args.push('-');
|
|
|
|
|
|
|
|
const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', {
|
|
|
|
stdin: 'null',
|
|
|
|
stdout: 'piped',
|
|
|
|
stderr: 'piped',
|
|
|
|
args,
|
|
|
|
})).output();
|
|
|
|
|
|
|
|
if(code !== 0) {
|
|
|
|
console.error(new TextDecoder().decode(stderr));
|
|
|
|
return new Response('decode failed', { status: 500, headers });
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: bother with cache someday maybe
|
|
|
|
const thumb = stdout;
|
|
|
|
|
|
|
|
return new Response(thumb, {
|
|
|
|
headers: {
|
|
|
|
...headers,
|
|
|
|
...{
|
|
|
|
'Content-Type': 'image/png',
|
|
|
|
'Cache-Control': 'public, max-age=31536000, immutable',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
// serving files from /public dir
|
|
|
|
if(['HEAD', 'GET'].includes(req.method)) {
|
|
|
|
const localPathPrefix = import.meta.dirname + '/public/';
|
|
|
|
const localPathSuffix = pathNormalize(url.pathname === '/' ? '/index.html' : url.pathname);
|
|
|
|
const localPath = pathNormalize(localPathPrefix + localPathSuffix);
|
|
|
|
if(localPath.startsWith(localPathPrefix) && existsSync(localPath)) {
|
|
|
|
const mediaTypes = {
|
|
|
|
'html': 'text/html;charset=utf-8',
|
|
|
|
'css': 'text/css;charset=utf-8',
|
|
|
|
'txt': 'text/plain;charset=utf-8',
|
|
|
|
'png': 'image/png',
|
|
|
|
};
|
|
|
|
|
|
|
|
let mediaType: String = 'application/octet-stream';
|
|
|
|
const dotIndex = localPathSuffix.lastIndexOf('.');
|
|
|
|
if(dotIndex >= 0) {
|
|
|
|
const ext = localPathSuffix.substring(dotIndex + 1);
|
|
|
|
if(ext in mediaTypes)
|
|
|
|
mediaType = mediaTypes[ext];
|
|
|
|
}
|
|
|
|
|
|
|
|
return new Response('', {
|
|
|
|
status: 200,
|
|
|
|
headers: {
|
|
|
|
...headers,
|
|
|
|
...{
|
|
|
|
'Content-Type': mediaType,
|
|
|
|
'X-Accel-Redirect': `/_public${localPathSuffix}`,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
// 404 page
|
|
|
|
return new Response('<!doctype html><meta charset=utf-8><title>404 Not Found</title><h1>404 Not Found</h1>', {
|
|
|
|
status: 404,
|
|
|
|
headers: {
|
|
|
|
...headers,
|
|
|
|
...{ 'Content-Type': 'text/html;charset=utf-8' },
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
// 404 fallback
|
|
|
|
return new Response('', {
|
|
|
|
status: ['OPTIONS', 'HEAD', 'GET', 'POST'].includes(req.method) ? 404 : 405,
|
|
|
|
headers,
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
Deno.serve({ port }, requestHandler);
|