import * as cheerio from 'npm:cheerio@^1.0.0'; import { MemcacheClient } from 'npm:memcache-client@^1.0.5'; import { existsSync } from "jsr:@std/fs"; import { basename, join as pathJoin } from "jsr:@std/path"; import { normalize as pathNormalize } from "jsr:@std/path/normalize"; import { encodeBase64Url } from "jsr:@std/encoding/base64url"; import { brotliCompressSync, brotliDecompressSync } from "node:zlib"; import { parseMediaType } from "jsr:@std/media-types"; import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts"; // todo: these should not be hardcoded lol const port: Number = 3009; const memcacheServer: String = "127.0.0.1:11211"; const allowedOrigins: String[] = [ 'edgii.net', 'chat.edgii.net', 'sockchat.edgii.net', 'ajaxchat.edgii.net', ]; const allowOEmbed: String[] = [ // copied from wordpress source sorta '.youtube.com', '.youtu.be', '.vimeo.com', '.dailymotion.com', '.dai.ly', '.flickr.com', '.flic.kr', '.smugmug.com', '.scribd.com', '.wordpress.tv', '.crowdsignal.net', '.polldaddy.com', '.poll.fm', '.survey.fm', '.twitter.com', '.soundcloud.com', '.spotify.com', '.imgur.com', '.issuu.com', '.mixcloud.com', '.ted.com', '.animoto.com', '.video214.com', '.tumblr.com', '.kickstarter.com', '.kck.st', '.cloudup.com', '.reverbnation.com', '.videopress.com', '.reddit.com', '.speakerdeck.com', '.screencast.com', '.amazon.com', '.amazon.com.mx', '.amazon.com.br', '.amazon.ca', '.amazon.co.uk', '.amazon.de', '.amazon.fr', '.amazon.it', '.amazon.es', '.amazon.in', '.amazon.nl', '.amazon.ru', '.amazon.co.jp', '.amazon.com.au', '.amazon.cn', '.a.co', '.amzn.to', '.amzn.eu', '.amzn.in', '.amzn.asia', '.z.cn', '.somecards.com', '.some.ly', '.tiktok.com', '.pinterest.com', '.pinterest.com.au', '.pinterest.com.mx', '.wolframcloud.com', '.pca.st', '.anghami.com', '.bsky.app', '.apple.com', ]; const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug')); const cache: MemcacheClient = new MemcacheClient({ server: memcacheServer, compressor: { // fuck it lol compressSync: buffer => buffer, decompressSync: buffer => buffer, }, }); const readableStreamToString = async (stream?: ReadableStream): string => { if(stream === null) return ''; const reader = stream.getReader(); const decoder = new TextDecoder; let result = ''; for(;;) { const { done, value } = await reader.read(); if(done) break; result += decoder.decode(value, { stream: true }); } result += decoder.decode(); return result; }; const isAllowedOEmbedDomain = (domain: string): Boolean => { if(!domain.startsWith('.')) domain = '.' + domain; for(const suffix of allowOEmbed) if(domain.endsWith(suffix)) return true; return false; }; const extractHtmlMetaData = (html) => { const values = {}; const titleTag = html('title')?.first()?.text().trim() ?? ''; if(titleTag.length > 0) values.title = titleTag; const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? ''; if(metaDescriptionTag.length > 0) values.description = metaDescriptionTag; const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? ''; if(metaThumbnailTag.length > 0) values.thumbnail = metaThumbnailTag; const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? ''; if(metaThemeColorTag.length > 0) values.theme_color = Color.string(metaThemeColorTag).hex(); const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? ''; if(linkImageSrcTag.length > 0) values.image = linkImageSrcTag; const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? ''; if(linkCanonicalTag.length > 0) values.canonical_url = linkCanonicalTag; return values; }; const extractOpenGraphData = (html) => { const values = {}; // this is hateful const properties = { 'url': { type: 'url' }, 'type': { type: 'str' }, 'title': { type: 'str' }, 'locale': { type: 'str' }, 'locale:alternate': { type: 'str', array: 'locales' }, 'description': { type: 'str' }, 'determiner': { type: 'str' }, 'site_name': { type: 'str' }, 'image': { alias: 'image:url', array: 'images' }, 'image:url': { of: 'image', type: 'url' }, 'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] }, 'image:type': { of: 'image', type: 'mime' }, 'image:width': { of: 'image', type: 'int' }, 'image:height': { of: 'image', type: 'int' }, 'image:alt': { of: 'image', type: 'string' }, 'video': { alias: 'video:url', array: 'videos' }, 'video:url': { of: 'video', type: 'url' }, 'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] }, 'video:type': { of: 'video', type: 'mime' }, 'video:width': { of: 'video', type: 'int' }, 'video:height': { of: 'video', type: 'int' }, 'video:tag': { of: 'video', type: 'str', array: 'tags' }, 'audio': { alias: 'audio:url', array: 'audios' }, 'audio:url': { of: 'audio', type: 'url' }, 'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] }, 'audio:type': { of: 'audio', type: 'mime' }, }; const tags = html('meta[property^="og:"]'); for(const tagInfo of tags) { const tag = html(tagInfo); let name = (tag.attr('property')?.trim() ?? '').substring(3); if(!(name in properties)) continue; let value = tag.attr('content')?.trim() ?? ''; let propInfo = properties[name]; let target = values; if(propInfo.alias) { name = propInfo.alias; propInfo = properties[name]; } if(propInfo.of) { name = name.substring(propInfo.of.length + 1); const objInfo = properties[propInfo.of]; if(objInfo.array) { if(objInfo.array in target) target = target[objInfo.array]; else target = target[objInfo.array] = []; const lastItem = target[target.length - 1]; if(lastItem === undefined || name in lastItem) { const newItem = {}; target.push(newItem); target = newItem; } else target = lastItem; } else { if(!(name in target)) target[name] = {}; target = target[name]; } } if(propInfo.array) { if(propInfo.array in target) target = target[propInfo.array]; else target = target[propInfo.array] = []; } else if(name in target) continue; if(propInfo.type === 'int') value = parseInt(value); else { if(propInfo.type === 'mime') { // world's most naive validation if(value.indexOf('/') < 0) value = undefined; } else if(propInfo.type === 'url') { try { const protos = propInfo.protos ?? ['https:', 'http:']; if(!protos.includes(new URL(value).protocol)) value = undefined; } catch(ex) { console.error(ex); value = undefined; } } else if(propInfo.type !== 'str') value = undefined; } if(value) { if(propInfo.array) target.push(value); else target[name] = value; } } return values; }; const extractTwitterData = (html) => { const values = {}; const properties = [ 'card', 'site', 'site:id', 'creator', 'creator:id', 'description', 'title', 'image', 'image:alt', 'player', 'player:width', 'player:height', 'player:stream', ]; for(const property of properties) { const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? ''; if(tag.length > 0) values[property.replace(':', '_')] = tag; } return values; }; const extractLinkedData = (html) => { const values = []; const tags = html('script[type="application/ld+json"]'); for(const tagInfo of tags) try { values.push(JSON.parse(html(tagInfo).text().trim())); } catch(ex) { console.error(ex); } return values; }; const extractOEmbedData = async (html, url: string, urlInfo: URL) => { // TODO: this should also support header discovery let oEmbedUrl: string = ''; // idk how long i'll bother with this for if(urlInfo.host === 'x.com' || urlInfo.host === 'twitter.com') oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`; else oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? ''; if(oEmbedUrl === '') return {}; try { return (await fetch(oEmbedUrl)).json(); } catch(ex) { console.error(ex); return {}; } }; const extractMetadata = async (url: string, urlInfo: URL) => { const data = await fetch(url); const contentTypeRaw = data.headers.get('content-type') ?? ''; const contentType = parseMediaType(contentTypeRaw); const info = {}; const addInfoOrDont = (prop, value) => { if(value !== null && value !== undefined) info[prop] = value; }; info.url = url; info.title = decodeURIComponent(basename(urlInfo.pathname)); info.site_name = urlInfo.host; if(contentType[0]) info.media_type = contentType[0]; if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) { const html = cheerio.load(await readableStreamToString(data.body)); const metaData = extractHtmlMetaData(html); const ogData = extractOpenGraphData(html); const twitterData = extractTwitterData(html); addInfoOrDont('url', ogData.url ?? metaData.canonical_url); addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title); addInfoOrDont('site_name', ogData.site_name); addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description); addInfoOrDont('color', metaData.theme_color); if(ogData.images?.length > 0) { const image = ogData.images[0]; info.image = info.image_url = image.secure_url ?? image.url; if(image.width > 0) info.image_width = image.width; if(image.height > 0) info.image_height = image.height; if(image.type) info.image_type = image.type; if(image.alt) info.image_alt = image.alt; if(info.image_width > 0) info.width = info.image_width; if(info.image_height > 0) info.height = info.image_height; } else { addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail); addInfoOrDont('image_alt', twitterData.image_alt); if(info.image_url) info.image = info.image_url; } if(ogData.audios?.length > 0) { const audio = ogData.audios[0]; info.audio_url = audio.secure_url ?? audio.url; if(audio.type) info.audio_type = audio.type; } if(ogData.videos?.length > 0) { const video = ogData.videos[0]; info.video_url = video.secure_url ?? video.url; if(video.width > 0) info.video_width = video.width; if(video.height > 0) info.video_height = video.height; if(video.type) info.video_type = video.type; if(video.tags?.length > 0) info.video_tags = video.tags; if(info.video_width > 0) info.width = info.video_width; if(info.video_height > 0) info.height = info.video_height; } else { addInfoOrDont('video_url', twitterData.player); addInfoOrDont('video_width', twitterData.player_width); addInfoOrDont('video_height', twitterData.player_height); if(info.video_width > 0) info.width = info.video_width; if(info.video_height > 0) info.height = info.video_height; } const linkedDatas = extractLinkedData(html); if(linkedDatas.length > 0) info.lds = linkedDatas; if(isAllowedOEmbedDomain(urlInfo.host)) { const oEmbedData = await extractOEmbedData(html, url, urlInfo); if(oEmbedData.version) info.oembed = oEmbedData; } } else { if(contentType[0].startsWith('image/')) { // } else if(contentType[0].startsWith('video/')) { // } else if(contentType[0].startsWith('audio/')) { // } } return info; }; const requestHandler = async (req: Request): Response => { const url = new URL(req.url); const headers = { 'X-Powered-By': 'Uiharu' }; if(req.headers.has('origin')) { const originRaw = req.headers.get('origin'); const origin = new URL(originRaw); if(!allowedOrigins.includes(origin.host)) return new Response('403', { status: 403, headers }); headers['Access-Control-Allow-Origin'] = originRaw; headers['Vary'] = 'Origin'; } if(req.method === 'OPTIONS') { headers['Allow'] = 'OPTIONS, GET, HEAD, POST'; headers['Access-Control-Allow-Methods'] = 'OPTIONS, GET, HEAD, POST'; // idk if this is the appropriate status code but: balls return new Response('', { status: 204, headers }); } if(url.pathname === '/metadata') { if(!['GET', 'HEAD', 'POST'].includes(req.method)) return new Response('', { status: 405, headers }); const started = performance.now(); headers['Content-Type'] = 'application/json;charset=utf-8'; let urlParamRaw: String = ''; if(req.method === 'POST') urlParamRaw = (await readableStreamToString(req.body)).trim(); else urlParamRaw = (new URLSearchParams(url.search)).get('url')?.trim() ?? ''; if(urlParamRaw === '') return new Response('{"error":"metadata:uri"}', { status: 400, headers }); if(urlParamRaw.startsWith('//')) urlParamRaw = 'https:' + urlParamRaw; let urlParam: URL; try { urlParam = new URL(urlParamRaw); } catch(ex) { return new Response('{"error":"metadata:uri"}', { status: 400, headers }); } urlParamRaw = urlParam.toString(); const urlHash = encodeBase64Url( await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw)) ); const cacheKey = `uiharu:metadata:${urlHash}`; // const cacheInfo = await cache.get(cacheKey); // if(cacheInfo !== undefined) // return new Response( // brotliDecompressSync(cacheInfo.value), // { // status: 200, // headers: { // ...headers, // ...{ // 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, // 'X-Uiharu-State': 'cache', // }, // }, // } // ); try { const json = JSON.stringify( await extractMetadata(urlParamRaw, urlParam) ); cache.set(cacheKey, brotliCompressSync(json), { compress: false, lifetime: 600 }); return new Response(json, { status: 200, headers: { ...headers, ...{ 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, 'X-Uiharu-State': 'fresh', }, }, }); } catch(ex) { console.error(ex); return new Response('{"error":"metadata:lookup"}', { status: 500, headers }); } } if(url.pathname === '/metadata/batch') { if(!['GET', 'HEAD', 'POST'].includes(req.method)) return new Response('', { status: 405, headers }); return new Response('{"took":0,"results":[]}', { headers: { ...headers, ...{ 'Content-Type': 'application/json' }, }, }); } const isAudio = url.pathname === '/metadata/thumb/audio'; const isVideo = url.pathname === '/metadata/thumb/video'; if(isAudio || isVideo) { if(!['HEAD', 'GET'].includes(req.method)) return new Response('', { status: 405, headers }); let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? ''; if(urlParamRaw === '') return new Response('missing url parameter', { status: 400, headers }); let scheme: String = ''; try { const urlParam = new URL(urlParamRaw); if(typeof urlParam.protocol === 'string') scheme = urlParam.protocol; urlParamRaw = urlParam.toString(); } catch(ex) { return new Response('invalid url parameter', { status: 400, headers }); } if(!['http:', 'https:'].includes(scheme)) return new Response('unsupported url scheme', { status: 400, headers }); // this seems like a terrible idea lol const args = ['-i', urlParamRaw]; if(isAudio) args.push('-an'); args.push('-f'); args.push('image2pipe'); args.push('-c:v'); args.push(isVideo ? 'png' : 'copy'); args.push('-frames:v'); args.push('1'); args.push('-'); const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', { stdin: 'null', stdout: 'piped', stderr: 'piped', args, })).output(); if(code !== 0) { console.error(new TextDecoder().decode(stderr)); return new Response('decode failed', { status: 500, headers }); } // TODO: bother with cache someday maybe const thumb = stdout; return new Response(thumb, { headers: { ...headers, ...{ 'Content-Type': 'image/png', 'Cache-Control': 'public, max-age=31536000, immutable', }, }, }); } // serving files from /public dir if(['HEAD', 'GET'].includes(req.method)) { const localPathPrefix = import.meta.dirname + '/public/'; const localPathSuffix = pathNormalize(url.pathname === '/' ? '/index.html' : url.pathname); const localPath = pathNormalize(localPathPrefix + localPathSuffix); if(localPath.startsWith(localPathPrefix) && existsSync(localPath)) { const mediaTypes = { 'html': 'text/html;charset=utf-8', 'css': 'text/css;charset=utf-8', 'txt': 'text/plain;charset=utf-8', 'png': 'image/png', }; let mediaType: String = 'application/octet-stream'; const dotIndex = localPathSuffix.lastIndexOf('.'); if(dotIndex >= 0) { const ext = localPathSuffix.substring(dotIndex + 1); if(ext in mediaTypes) mediaType = mediaTypes[ext]; } return new Response('', { status: 200, headers: { ...headers, ...{ 'Content-Type': mediaType, 'X-Accel-Redirect': `/_public${localPathSuffix}`, } }, }); } // 404 page return new Response('