665 lines
21 KiB
TypeScript
665 lines
21 KiB
TypeScript
import * as cheerio from 'npm:cheerio@^1.0.0';
|
|
import { MemcacheClient } from 'npm:memcache-client@^1.0.5';
|
|
import { existsSync } from "jsr:@std/fs";
|
|
import { basename, join as pathJoin } from "jsr:@std/path";
|
|
import { normalize as pathNormalize } from "jsr:@std/path/normalize";
|
|
import { encodeBase64Url } from "jsr:@std/encoding/base64url";
|
|
import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
|
|
import { parseMediaType } from "jsr:@std/media-types";
|
|
import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts";
|
|
|
|
// todo: these should not be hardcoded lol
|
|
const port: Number = 3009;
|
|
const memcacheServer: String = "127.0.0.1:11211";
|
|
const allowedOrigins: String[] = [
|
|
'edgii.net',
|
|
'chat.edgii.net',
|
|
'sockchat.edgii.net',
|
|
'ajaxchat.edgii.net',
|
|
];
|
|
const allowOEmbed: String[] = [ // copied from wordpress source sorta
|
|
'.youtube.com',
|
|
'.youtu.be',
|
|
'.vimeo.com',
|
|
'.dailymotion.com',
|
|
'.dai.ly',
|
|
'.flickr.com',
|
|
'.flic.kr',
|
|
'.smugmug.com',
|
|
'.scribd.com',
|
|
'.wordpress.tv',
|
|
'.crowdsignal.net',
|
|
'.polldaddy.com',
|
|
'.poll.fm',
|
|
'.survey.fm',
|
|
'.twitter.com',
|
|
'.soundcloud.com',
|
|
'.spotify.com',
|
|
'.imgur.com',
|
|
'.issuu.com',
|
|
'.mixcloud.com',
|
|
'.ted.com',
|
|
'.animoto.com',
|
|
'.video214.com',
|
|
'.tumblr.com',
|
|
'.kickstarter.com',
|
|
'.kck.st',
|
|
'.cloudup.com',
|
|
'.reverbnation.com',
|
|
'.videopress.com',
|
|
'.reddit.com',
|
|
'.speakerdeck.com',
|
|
'.screencast.com',
|
|
'.amazon.com',
|
|
'.amazon.com.mx',
|
|
'.amazon.com.br',
|
|
'.amazon.ca',
|
|
'.amazon.co.uk',
|
|
'.amazon.de',
|
|
'.amazon.fr',
|
|
'.amazon.it',
|
|
'.amazon.es',
|
|
'.amazon.in',
|
|
'.amazon.nl',
|
|
'.amazon.ru',
|
|
'.amazon.co.jp',
|
|
'.amazon.com.au',
|
|
'.amazon.cn',
|
|
'.a.co',
|
|
'.amzn.to',
|
|
'.amzn.eu',
|
|
'.amzn.in',
|
|
'.amzn.asia',
|
|
'.z.cn',
|
|
'.somecards.com',
|
|
'.some.ly',
|
|
'.tiktok.com',
|
|
'.pinterest.com',
|
|
'.pinterest.com.au',
|
|
'.pinterest.com.mx',
|
|
'.wolframcloud.com',
|
|
'.pca.st',
|
|
'.anghami.com',
|
|
'.bsky.app',
|
|
'.apple.com',
|
|
];
|
|
|
|
const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug'));
|
|
const cache: MemcacheClient = new MemcacheClient({
|
|
server: memcacheServer,
|
|
compressor: {
|
|
// fuck it lol
|
|
compressSync: buffer => buffer,
|
|
decompressSync: buffer => buffer,
|
|
},
|
|
});
|
|
|
|
const readableStreamToString = async (stream?: ReadableStream): string => {
|
|
if(stream === null)
|
|
return '';
|
|
|
|
const reader = stream.getReader();
|
|
const decoder = new TextDecoder;
|
|
let result = '';
|
|
|
|
for(;;) {
|
|
const { done, value } = await reader.read();
|
|
if(done) break;
|
|
result += decoder.decode(value, { stream: true });
|
|
}
|
|
|
|
result += decoder.decode();
|
|
|
|
return result;
|
|
};
|
|
|
|
const isAllowedOEmbedDomain = (domain: string): Boolean => {
|
|
if(!domain.startsWith('.'))
|
|
domain = '.' + domain;
|
|
|
|
for(const suffix of allowOEmbed)
|
|
if(domain.endsWith(suffix))
|
|
return true;
|
|
|
|
return false;
|
|
};
|
|
|
|
const extractHtmlMetaData = (html) => {
|
|
const values = {};
|
|
|
|
const titleTag = html('title')?.first()?.text().trim() ?? '';
|
|
if(titleTag.length > 0)
|
|
values.title = titleTag;
|
|
|
|
const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? '';
|
|
if(metaDescriptionTag.length > 0)
|
|
values.description = metaDescriptionTag;
|
|
|
|
const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? '';
|
|
if(metaThumbnailTag.length > 0)
|
|
values.thumbnail = metaThumbnailTag;
|
|
|
|
const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? '';
|
|
if(metaThemeColorTag.length > 0)
|
|
values.theme_color = Color.string(metaThemeColorTag).hex();
|
|
|
|
const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? '';
|
|
if(linkImageSrcTag.length > 0)
|
|
values.image = linkImageSrcTag;
|
|
|
|
const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? '';
|
|
if(linkCanonicalTag.length > 0)
|
|
values.canonical_url = linkCanonicalTag;
|
|
|
|
return values;
|
|
};
|
|
|
|
const extractOpenGraphData = (html) => {
|
|
const values = {};
|
|
|
|
// this is hateful
|
|
const properties = {
|
|
'url': { type: 'url' },
|
|
'type': { type: 'str' },
|
|
'title': { type: 'str' },
|
|
'locale': { type: 'str' },
|
|
'locale:alternate': { type: 'str', array: 'locales' },
|
|
'description': { type: 'str' },
|
|
'determiner': { type: 'str' },
|
|
'site_name': { type: 'str' },
|
|
|
|
'image': { alias: 'image:url', array: 'images' },
|
|
'image:url': { of: 'image', type: 'url' },
|
|
'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] },
|
|
'image:type': { of: 'image', type: 'mime' },
|
|
'image:width': { of: 'image', type: 'int' },
|
|
'image:height': { of: 'image', type: 'int' },
|
|
'image:alt': { of: 'image', type: 'string' },
|
|
|
|
'video': { alias: 'video:url', array: 'videos' },
|
|
'video:url': { of: 'video', type: 'url' },
|
|
'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] },
|
|
'video:type': { of: 'video', type: 'mime' },
|
|
'video:width': { of: 'video', type: 'int' },
|
|
'video:height': { of: 'video', type: 'int' },
|
|
'video:tag': { of: 'video', type: 'str', array: 'tags' },
|
|
|
|
'audio': { alias: 'audio:url', array: 'audios' },
|
|
'audio:url': { of: 'audio', type: 'url' },
|
|
'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] },
|
|
'audio:type': { of: 'audio', type: 'mime' },
|
|
};
|
|
|
|
const tags = html('meta[property^="og:"]');
|
|
for(const tagInfo of tags) {
|
|
const tag = html(tagInfo);
|
|
|
|
let name = (tag.attr('property')?.trim() ?? '').substring(3);
|
|
if(!(name in properties))
|
|
continue;
|
|
|
|
let value = tag.attr('content')?.trim() ?? '';
|
|
|
|
let propInfo = properties[name];
|
|
let target = values;
|
|
|
|
if(propInfo.alias) {
|
|
name = propInfo.alias;
|
|
propInfo = properties[name];
|
|
}
|
|
|
|
if(propInfo.of) {
|
|
name = name.substring(propInfo.of.length + 1);
|
|
const objInfo = properties[propInfo.of];
|
|
|
|
if(objInfo.array) {
|
|
if(objInfo.array in target)
|
|
target = target[objInfo.array];
|
|
else
|
|
target = target[objInfo.array] = [];
|
|
|
|
const lastItem = target[target.length - 1];
|
|
if(lastItem === undefined || name in lastItem) {
|
|
const newItem = {};
|
|
target.push(newItem);
|
|
target = newItem;
|
|
} else
|
|
target = lastItem;
|
|
} else {
|
|
if(!(name in target))
|
|
target[name] = {};
|
|
|
|
target = target[name];
|
|
}
|
|
}
|
|
|
|
if(propInfo.array) {
|
|
if(propInfo.array in target)
|
|
target = target[propInfo.array];
|
|
else
|
|
target = target[propInfo.array] = [];
|
|
} else if(name in target)
|
|
continue;
|
|
|
|
if(propInfo.type === 'int')
|
|
value = parseInt(value);
|
|
else {
|
|
if(propInfo.type === 'mime') {
|
|
// world's most naive validation
|
|
if(value.indexOf('/') < 0)
|
|
value = undefined;
|
|
} else if(propInfo.type === 'url') {
|
|
try {
|
|
const protos = propInfo.protos ?? ['https:', 'http:'];
|
|
if(!protos.includes(new URL(value).protocol))
|
|
value = undefined;
|
|
} catch(ex) {
|
|
console.error(ex);
|
|
value = undefined;
|
|
}
|
|
} else if(propInfo.type !== 'str')
|
|
value = undefined;
|
|
}
|
|
|
|
if(value) {
|
|
if(propInfo.array)
|
|
target.push(value);
|
|
else
|
|
target[name] = value;
|
|
}
|
|
}
|
|
|
|
return values;
|
|
};
|
|
|
|
const extractTwitterData = (html) => {
|
|
const values = {};
|
|
const properties = [
|
|
'card',
|
|
'site',
|
|
'site:id',
|
|
'creator',
|
|
'creator:id',
|
|
'description',
|
|
'title',
|
|
'image',
|
|
'image:alt',
|
|
'player',
|
|
'player:width',
|
|
'player:height',
|
|
'player:stream',
|
|
];
|
|
|
|
for(const property of properties) {
|
|
const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? '';
|
|
if(tag.length > 0)
|
|
values[property.replace(':', '_')] = tag;
|
|
}
|
|
|
|
return values;
|
|
};
|
|
|
|
const extractLinkedData = (html) => {
|
|
const values = [];
|
|
|
|
const tags = html('script[type="application/ld+json"]');
|
|
for(const tagInfo of tags)
|
|
try {
|
|
values.push(JSON.parse(html(tagInfo).text().trim()));
|
|
} catch(ex) {
|
|
console.error(ex);
|
|
}
|
|
|
|
return values;
|
|
};
|
|
|
|
const extractOEmbedData = async (html, url: string, urlInfo: URL) => {
|
|
// TODO: this should also support header discovery
|
|
|
|
let oEmbedUrl: string = '';
|
|
|
|
// idk how long i'll bother with this for
|
|
if(urlInfo.host === 'x.com' || urlInfo.host === 'twitter.com')
|
|
oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`;
|
|
else
|
|
oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? '';
|
|
|
|
if(oEmbedUrl === '')
|
|
return {};
|
|
|
|
try {
|
|
return (await fetch(oEmbedUrl)).json();
|
|
} catch(ex) {
|
|
console.error(ex);
|
|
return {};
|
|
}
|
|
};
|
|
|
|
const extractMetadata = async (url: string, urlInfo: URL) => {
|
|
const data = await fetch(url);
|
|
const contentTypeRaw = data.headers.get('content-type') ?? '';
|
|
const contentType = parseMediaType(contentTypeRaw);
|
|
|
|
const info = {};
|
|
const addInfoOrDont = (prop, value) => {
|
|
if(value !== null && value !== undefined)
|
|
info[prop] = value;
|
|
};
|
|
|
|
if(contentTypeRaw)
|
|
info.media_type = contentType[0];
|
|
|
|
if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) {
|
|
const html = cheerio.load(await readableStreamToString(data.body));
|
|
|
|
const metaData = extractHtmlMetaData(html);
|
|
const ogData = extractOpenGraphData(html);
|
|
const twitterData = extractTwitterData(html);
|
|
|
|
addInfoOrDont('url', ogData.url ?? metaData.canonical_url ?? url);
|
|
addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title);
|
|
addInfoOrDont('site_name', ogData.site_name);
|
|
addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description);
|
|
addInfoOrDont('color', metaData.theme_color);
|
|
|
|
if(ogData.images?.length > 0) {
|
|
const image = ogData.images[0];
|
|
info.image = info.image_url = image.secure_url ?? image.url;
|
|
if(image.width > 0)
|
|
info.image_width = image.width;
|
|
if(image.height > 0)
|
|
info.image_height = image.height;
|
|
if(image.type)
|
|
info.image_type = image.type;
|
|
if(image.alt)
|
|
info.image_alt = image.alt;
|
|
if(info.image_width > 0)
|
|
info.width = info.image_width;
|
|
if(info.image_height > 0)
|
|
info.height = info.image_height;
|
|
} else {
|
|
addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail);
|
|
addInfoOrDont('image_alt', twitterData.image_alt);
|
|
if(info.image_url)
|
|
info.image = info.image_url;
|
|
}
|
|
|
|
if(ogData.audios?.length > 0) {
|
|
const audio = ogData.audios[0];
|
|
info.audio_url = audio.secure_url ?? audio.url;
|
|
if(audio.type)
|
|
info.audio_type = audio.type;
|
|
}
|
|
|
|
if(ogData.videos?.length > 0) {
|
|
const video = ogData.videos[0];
|
|
info.video_url = video.secure_url ?? video.url;
|
|
if(video.width > 0)
|
|
info.video_width = video.width;
|
|
if(video.height > 0)
|
|
info.video_height = video.height;
|
|
if(video.type)
|
|
info.video_type = video.type;
|
|
if(video.tags?.length > 0)
|
|
info.video_tags = video.tags;
|
|
if(info.video_width > 0)
|
|
info.width = info.video_width;
|
|
if(info.video_height > 0)
|
|
info.height = info.video_height;
|
|
} else {
|
|
addInfoOrDont('video_url', twitterData.player);
|
|
addInfoOrDont('video_width', twitterData.player_width);
|
|
addInfoOrDont('video_height', twitterData.player_height);
|
|
if(info.video_width > 0)
|
|
info.width = info.video_width;
|
|
if(info.video_height > 0)
|
|
info.height = info.video_height;
|
|
}
|
|
|
|
const linkedDatas = extractLinkedData(html);
|
|
if(linkedDatas.length > 0)
|
|
info.lds = linkedDatas;
|
|
|
|
if(isAllowedOEmbedDomain(urlInfo.host)) {
|
|
const oEmbedData = await extractOEmbedData(html, url, urlInfo);
|
|
if(oEmbedData.version)
|
|
info.oembed = oEmbedData;
|
|
}
|
|
} else {
|
|
info.url = url;
|
|
info.title = decodeURIComponent(basename(urlInfo.pathname));
|
|
info.site_name = urlInfo.host;
|
|
|
|
if(contentType[0].startsWith('image/')) {
|
|
//
|
|
} else if(contentType[0].startsWith('video/')) {
|
|
//
|
|
} else if(contentType[0].startsWith('audio/')) {
|
|
//
|
|
}
|
|
}
|
|
|
|
return info;
|
|
};
|
|
|
|
const requestHandler = async (req: Request): Response => {
|
|
const url = new URL(req.url);
|
|
const headers = { 'X-Powered-By': 'Uiharu' };
|
|
|
|
if(req.headers.has('origin')) {
|
|
const originRaw = req.headers.get('origin');
|
|
const origin = new URL(originRaw);
|
|
|
|
if(!allowedOrigins.includes(origin.host))
|
|
return new Response('403', { status: 403, headers });
|
|
|
|
headers['Access-Control-Allow-Origin'] = originRaw;
|
|
headers['Vary'] = 'Origin';
|
|
}
|
|
|
|
if(req.method === 'OPTIONS') {
|
|
headers['Allow'] = 'OPTIONS, GET, HEAD, POST';
|
|
headers['Access-Control-Allow-Methods'] = 'OPTIONS, GET, HEAD, POST';
|
|
|
|
// idk if this is the appropriate status code but: balls
|
|
return new Response('', { status: 204, headers });
|
|
}
|
|
|
|
if(url.pathname === '/metadata') {
|
|
if(!['GET', 'HEAD', 'POST'].includes(req.method))
|
|
return new Response('', { status: 405, headers });
|
|
|
|
const started = performance.now();
|
|
|
|
headers['Content-Type'] = 'application/json;charset=utf-8';
|
|
|
|
let urlParamRaw: String = '';
|
|
if(req.method === 'POST')
|
|
urlParamRaw = (await readableStreamToString(req.body)).trim();
|
|
else
|
|
urlParamRaw = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
|
|
|
|
if(urlParamRaw === '')
|
|
return new Response('{"error":"metadata:uri"}', { status: 400, headers });
|
|
if(urlParamRaw.startsWith('//'))
|
|
urlParamRaw = 'https:' + urlParamRaw;
|
|
|
|
let urlParam: URL;
|
|
try {
|
|
urlParam = new URL(urlParamRaw);
|
|
} catch(ex) {
|
|
return new Response('{"error":"metadata:uri"}', { status: 400, headers });
|
|
}
|
|
|
|
urlParamRaw = urlParam.toString();
|
|
|
|
const urlHash = encodeBase64Url(
|
|
await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw))
|
|
);
|
|
const cacheKey = `uiharu:metadata:${urlHash}`;
|
|
// const cacheInfo = await cache.get(cacheKey);
|
|
// if(cacheInfo !== undefined)
|
|
// return new Response(
|
|
// brotliDecompressSync(cacheInfo.value),
|
|
// {
|
|
// status: 200,
|
|
// headers: {
|
|
// ...headers,
|
|
// ...{
|
|
// 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
|
|
// 'X-Uiharu-State': 'cache',
|
|
// },
|
|
// },
|
|
// }
|
|
// );
|
|
|
|
try {
|
|
const json = JSON.stringify(
|
|
await extractMetadata(urlParamRaw, urlParam)
|
|
);
|
|
|
|
cache.set(cacheKey, brotliCompressSync(json), {
|
|
compress: false,
|
|
lifetime: 600
|
|
});
|
|
|
|
return new Response(json, {
|
|
status: 200,
|
|
headers: {
|
|
...headers,
|
|
...{
|
|
'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
|
|
'X-Uiharu-State': 'fresh',
|
|
},
|
|
},
|
|
});
|
|
} catch(ex) {
|
|
console.error(ex);
|
|
return new Response('{"error":"metadata:lookup"}', { status: 500, headers });
|
|
}
|
|
}
|
|
|
|
if(url.pathname === '/metadata/batch') {
|
|
if(!['GET', 'HEAD', 'POST'].includes(req.method))
|
|
return new Response('', { status: 405, headers });
|
|
|
|
return new Response('{"took":0,"results":[]}', {
|
|
headers: {
|
|
...headers,
|
|
...{ 'Content-Type': 'application/json' },
|
|
},
|
|
});
|
|
}
|
|
|
|
const isAudio = url.pathname === '/metadata/thumb/audio';
|
|
const isVideo = url.pathname === '/metadata/thumb/video';
|
|
if(isAudio || isVideo) {
|
|
if(!['HEAD', 'GET'].includes(req.method))
|
|
return new Response('', { status: 405, headers });
|
|
|
|
let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
|
|
if(urlParamRaw === '')
|
|
return new Response('missing url parameter', { status: 400, headers });
|
|
|
|
let scheme: String = '';
|
|
try {
|
|
const urlParam = new URL(urlParamRaw);
|
|
if(typeof urlParam.protocol === 'string')
|
|
scheme = urlParam.protocol;
|
|
|
|
urlParamRaw = urlParam.toString();
|
|
} catch(ex) {
|
|
return new Response('invalid url parameter', { status: 400, headers });
|
|
}
|
|
|
|
if(!['http:', 'https:'].includes(scheme))
|
|
return new Response('unsupported url scheme', { status: 400, headers });
|
|
|
|
// this seems like a terrible idea lol
|
|
const args = ['-i', urlParamRaw];
|
|
if(isAudio) args.push('-an');
|
|
args.push('-f');
|
|
args.push('image2pipe');
|
|
args.push('-c:v');
|
|
args.push(isVideo ? 'png' : 'copy');
|
|
args.push('-frames:v');
|
|
args.push('1');
|
|
args.push('-');
|
|
|
|
const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', {
|
|
stdin: 'null',
|
|
stdout: 'piped',
|
|
stderr: 'piped',
|
|
args,
|
|
})).output();
|
|
|
|
if(code !== 0) {
|
|
console.error(new TextDecoder().decode(stderr));
|
|
return new Response('decode failed', { status: 500, headers });
|
|
}
|
|
|
|
// TODO: bother with cache someday maybe
|
|
const thumb = stdout;
|
|
|
|
return new Response(thumb, {
|
|
headers: {
|
|
...headers,
|
|
...{
|
|
'Content-Type': 'image/png',
|
|
'Cache-Control': 'public, max-age=31536000, immutable',
|
|
},
|
|
},
|
|
});
|
|
}
|
|
|
|
// serving files from /public dir
|
|
if(['HEAD', 'GET'].includes(req.method)) {
|
|
const localPathPrefix = import.meta.dirname + '/public/';
|
|
const localPathSuffix = pathNormalize(url.pathname === '/' ? '/index.html' : url.pathname);
|
|
const localPath = pathNormalize(localPathPrefix + localPathSuffix);
|
|
if(localPath.startsWith(localPathPrefix) && existsSync(localPath)) {
|
|
const mediaTypes = {
|
|
'html': 'text/html;charset=utf-8',
|
|
'css': 'text/css;charset=utf-8',
|
|
'txt': 'text/plain;charset=utf-8',
|
|
'png': 'image/png',
|
|
};
|
|
|
|
let mediaType: String = 'application/octet-stream';
|
|
const dotIndex = localPathSuffix.lastIndexOf('.');
|
|
if(dotIndex >= 0) {
|
|
const ext = localPathSuffix.substring(dotIndex + 1);
|
|
if(ext in mediaTypes)
|
|
mediaType = mediaTypes[ext];
|
|
}
|
|
|
|
return new Response('', {
|
|
status: 200,
|
|
headers: {
|
|
...headers,
|
|
...{
|
|
'Content-Type': mediaType,
|
|
'X-Accel-Redirect': `/_public${localPathSuffix}`,
|
|
}
|
|
},
|
|
});
|
|
}
|
|
|
|
// 404 page
|
|
return new Response('<!doctype html><meta charset=utf-8><title>404 Not Found</title><h1>404 Not Found</h1>', {
|
|
status: 404,
|
|
headers: {
|
|
...headers,
|
|
...{ 'Content-Type': 'text/html;charset=utf-8' },
|
|
},
|
|
});
|
|
}
|
|
|
|
// 404 fallback
|
|
return new Response('', {
|
|
status: ['OPTIONS', 'HEAD', 'GET', 'POST'].includes(req.method) ? 404 : 405,
|
|
headers,
|
|
});
|
|
};
|
|
|
|
Deno.serve({ port }, requestHandler);
|