import * as cheerio from 'npm:cheerio@^1.0.0'; import { MemcacheClient } from 'npm:memcache-client@^1.0.5'; import { existsSync } from "jsr:@std/fs"; import { basename, join as pathJoin } from "jsr:@std/path"; import { normalize as pathNormalize } from "jsr:@std/path/normalize"; import { encodeBase64Url } from "jsr:@std/encoding/base64url"; import { brotliCompressSync, brotliDecompressSync } from "node:zlib"; import { parseMediaType } from "jsr:@std/media-types"; import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts"; // todo: these should not be hardcoded lol const hostName: String = 'uiharu.edgii.net'; const port: Number = 3009; const memcacheServer: String = '127.0.0.1:11211'; const allowedOrigins: String[] = [ 'edgii.net', 'chat.edgii.net', 'sockchat.edgii.net', 'ajaxchat.edgii.net', ]; const allowOEmbed: String[] = [ // copied from wordpress source sorta '.youtube.com', '.youtu.be', '.vimeo.com', '.dailymotion.com', '.dai.ly', '.flickr.com', '.flic.kr', '.smugmug.com', '.scribd.com', '.wordpress.tv', '.crowdsignal.net', '.polldaddy.com', '.poll.fm', '.survey.fm', '.twitter.com', '.soundcloud.com', '.spotify.com', '.imgur.com', '.issuu.com', '.mixcloud.com', '.ted.com', '.animoto.com', '.video214.com', '.tumblr.com', '.kickstarter.com', '.kck.st', '.cloudup.com', '.reverbnation.com', '.videopress.com', '.reddit.com', '.speakerdeck.com', '.screencast.com', '.amazon.com', '.amazon.com.mx', '.amazon.com.br', '.amazon.ca', '.amazon.co.uk', '.amazon.de', '.amazon.fr', '.amazon.it', '.amazon.es', '.amazon.in', '.amazon.nl', '.amazon.ru', '.amazon.co.jp', '.amazon.com.au', '.amazon.cn', '.a.co', '.amzn.to', '.amzn.eu', '.amzn.in', '.amzn.asia', '.z.cn', '.somecards.com', '.some.ly', '.tiktok.com', '.pinterest.com', '.pinterest.com.au', '.pinterest.com.mx', '.wolframcloud.com', '.instagram.com', '.facebook.com', '.pca.st', '.anghami.com', '.bsky.app', '.apple.com', '.flashii.net', '.fii.moe', '.tako.zone', '.patchii.net', '.railgun.sh', '.flash.moe', '.edgii.net', ]; const appVersion: String = '20241029'; const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug')); const cache: MemcacheClient = new MemcacheClient({ server: memcacheServer, compressor: { // fuck it lol compressSync: buffer => buffer, decompressSync: buffer => buffer, }, }); const uiharuFetch = async (url, init) => { if(!init) init = {}; if(!init.headers) init.headers = {}; if(!init.headers['Accept']) init.headers['Accept'] = 'text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8'; if(!init.headers['Accept-Language']) init.headers['Accept-Language'] = 'en-GB, en;q=0.9, ja-jp;q=0.6, *;q=0.5'; if(!init.headers['User-Agent']) init.headers['User-Agent'] = `Mozilla/5.0 (compatible; Uiharu/${appVersion}; +http://fii.moe/uiharu)`; return await fetch(url, init); }; const readableStreamToString = async (stream?: ReadableStream): string => { if(stream === null) return ''; const reader = stream.getReader(); const decoder = new TextDecoder; let result = ''; for(;;) { const { done, value } = await reader.read(); if(done) break; result += decoder.decode(value, { stream: true }); } result += decoder.decode(); return result; }; const isDomainSuffix = (known: string, user: string) => { if(!known.startsWith('.')) known = '.' + known; if(!user.startsWith('.')) user = '.' + user; return user.endsWith(known); }; const isAllowedOEmbedDomain = (domain: string): Boolean => { if(!domain.startsWith('.')) domain = '.' + domain; for(const suffix of allowOEmbed) if(domain.endsWith(suffix)) return true; return false; }; const extractHtmlMetaData = (html) => { const values = {}; const titleTag = html('title')?.first()?.text().trim() ?? ''; if(titleTag.length > 0) values.title = titleTag; const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? ''; if(metaDescriptionTag.length > 0) values.description = metaDescriptionTag; const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? ''; if(metaThumbnailTag.length > 0) values.thumbnail = metaThumbnailTag; const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? ''; if(metaThemeColorTag.length > 0) values.theme_color = Color.string(metaThemeColorTag).hex(); const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? ''; if(linkImageSrcTag.length > 0) values.image = linkImageSrcTag; const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? ''; if(linkCanonicalTag.length > 0) values.canonical_url = linkCanonicalTag; return values; }; const extractOpenGraphData = (html) => { const values = {}; // this is hateful const properties = { 'url': { type: 'url' }, 'type': { type: 'str' }, 'title': { type: 'str' }, 'locale': { type: 'str' }, 'locale:alternate': { type: 'str', array: 'locales' }, 'description': { type: 'str' }, 'determiner': { type: 'str' }, 'site_name': { type: 'str' }, 'image': { alias: 'image:url', array: 'images' }, 'image:url': { of: 'image', type: 'url' }, 'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] }, 'image:type': { of: 'image', type: 'mime' }, 'image:width': { of: 'image', type: 'int' }, 'image:height': { of: 'image', type: 'int' }, 'image:alt': { of: 'image', type: 'string' }, 'video': { alias: 'video:url', array: 'videos' }, 'video:url': { of: 'video', type: 'url' }, 'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] }, 'video:type': { of: 'video', type: 'mime' }, 'video:width': { of: 'video', type: 'int' }, 'video:height': { of: 'video', type: 'int' }, 'video:tag': { of: 'video', type: 'str', array: 'tags' }, 'audio': { alias: 'audio:url', array: 'audios' }, 'audio:url': { of: 'audio', type: 'url' }, 'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] }, 'audio:type': { of: 'audio', type: 'mime' }, }; const tags = html('meta[property^="og:"]'); for(const tagInfo of tags) { const tag = html(tagInfo); let name = (tag.attr('property')?.trim() ?? '').substring(3); if(!(name in properties)) continue; let value = tag.attr('content')?.trim() ?? ''; let propInfo = properties[name]; let target = values; if(propInfo.alias) { name = propInfo.alias; propInfo = properties[name]; } if(propInfo.of) { name = name.substring(propInfo.of.length + 1); const objInfo = properties[propInfo.of]; if(objInfo.array) { if(objInfo.array in target) target = target[objInfo.array]; else target = target[objInfo.array] = []; const lastItem = target[target.length - 1]; if(lastItem === undefined || name in lastItem) { const newItem = {}; target.push(newItem); target = newItem; } else target = lastItem; } else { if(!(name in target)) target[name] = {}; target = target[name]; } } if(propInfo.array) { if(propInfo.array in target) target = target[propInfo.array]; else target = target[propInfo.array] = []; } else if(name in target) continue; if(propInfo.type === 'int') value = parseInt(value); else { if(propInfo.type === 'mime') { // world's most naive validation if(value.indexOf('/') < 0) value = undefined; } else if(propInfo.type === 'url') { try { const protos = propInfo.protos ?? ['https:', 'http:']; if(!protos.includes(new URL(value).protocol)) value = undefined; } catch(ex) { console.error(ex); value = undefined; } } else if(propInfo.type !== 'str') value = undefined; } if(value) { if(propInfo.array) target.push(value); else target[name] = value; } } return values; }; const extractTwitterData = (html) => { const values = {}; const properties = [ 'card', 'site', 'site:id', 'creator', 'creator:id', 'description', 'title', 'image', 'image:alt', 'player', 'player:width', 'player:height', 'player:stream', ]; for(const property of properties) { const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? ''; if(tag.length > 0) values[property.replace(':', '_')] = tag; } return values; }; const extractLinkedData = (html) => { const values = []; const tags = html('script[type="application/ld+json"]'); for(const tagInfo of tags) try { values.push(JSON.parse(html(tagInfo).text().trim())); } catch(ex) { console.error(ex); } return values; }; const parseLinkHeader = (header: string) => { const links = []; const lines = header.split(','); for(const key in lines) { const parts = lines[key].trim().split(';').map(part => part.trim()); let href = parts.shift(); if(typeof href !== 'string' || !href.startsWith('<') || !href.endsWith('>')) continue; href = decodeURI(href.slice(1, -1)); const link = {}; links.push(link); for(const part of parts) { const attr = part.split('=', 2); let value = attr[1]; if(value.startsWith('"') && value.endsWith('"')) value = value.slice(1, -1); link[attr[0]] = value; } // applying this last to avoid tomfoolery :3 link.href = href; } return links; }; const extractOEmbedData = async (response: Response, html, url: string, urlInfo: URL) => { let oEmbedUrl: string = ''; // TODO: maintain a fucking list because its too difficult for services to just provide tags if(isDomainSuffix('x.com', urlInfo.host) || isDomainSuffix('twitter.com', urlInfo.host)) oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`; else if(isDomainSuffix('soundcloud.com', urlInfo.host)) oEmbedUrl = `https://soundcloud.com/oembed?format=json&url=${encodeURIComponent(url)}`; else if(isDomainSuffix('tiktok.com', urlInfo.host)) oEmbedUrl = `https://www.tiktok.com/oembed?url=${encodeURIComponent(url)}`; else if(isDomainSuffix('mixcloud.com', urlInfo.host)) oEmbedUrl = `https://app.mixcloud.com/oembed/?url=${encodeURIComponent(url)}`; else if(html !== undefined) oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? ''; if(oEmbedUrl === '') { const links = parseLinkHeader(response.headers.get('link') ?? ''); for(const link of links) if(link.rel === 'alternate' && link.type === 'application/json+oembed') { oEmbedUrl = link.href; break; } } if(oEmbedUrl === '') return {}; try { return (await uiharuFetch(oEmbedUrl)).json(); } catch(ex) { console.error(ex); return {}; } }; const extractMetadata = async (version: number, url: string, urlInfo: URL) => { const response = await uiharuFetch(url); const contentTypeRaw = response.headers.get('content-type') ?? ''; const contentType = parseMediaType(contentTypeRaw); const info = {}; const addInfoOrDont = (prop, value) => { if(value !== null && value !== undefined) info[prop] = value; }; info.url = url; info.title = decodeURIComponent(basename(urlInfo.pathname)); info.site_name = urlInfo.host; if(contentType[0]) info.media_type = contentType[0]; let html = undefined; if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) { html = cheerio.load(await readableStreamToString(response.body)); const metaData = extractHtmlMetaData(html); const ogData = extractOpenGraphData(html); const twitterData = extractTwitterData(html); addInfoOrDont('url', ogData.url ?? metaData.canonical_url); addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title); addInfoOrDont('site_name', ogData.site_name); addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description); addInfoOrDont('color', metaData.theme_color); if(ogData.images?.length > 0) { const image = ogData.images[0]; info.image_url = image.secure_url ?? image.url; if(image.width > 0) info.image_width = image.width; if(image.height > 0) info.image_height = image.height; if(image.type) info.image_type = image.type; if(image.alt) info.image_alt = image.alt; } else { addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail); addInfoOrDont('image_alt', twitterData.image_alt); } if(ogData.audios?.length > 0) { const audio = ogData.audios[0]; info.audio_url = audio.secure_url ?? audio.url; if(audio.type) info.audio_type = audio.type; } if(ogData.videos?.length > 0) { const video = ogData.videos[0]; info.video_url = video.secure_url ?? video.url; if(video.width > 0) info.video_width = video.width; if(video.height > 0) info.video_height = video.height; if(video.type) info.video_type = video.type; if(video.tags?.length > 0) info.video_tags = video.tags; } else { addInfoOrDont('video_url', twitterData.player); addInfoOrDont('video_width', twitterData.player_width); addInfoOrDont('video_height', twitterData.player_height); } if(version < 2) { info.image = info.image_url; if(info.video_width > 0) info.width = info.video_width; else if(info.image_width > 0) info.width = info.image_width; if(info.video_height > 0) info.height = info.video_height; else if(info.image_height > 0) info.height = info.image_height; } const linkedDatas = extractLinkedData(html); // idk what to do with this yet, only including this in debug mode for now if(isDebug && linkedDatas.length > 0) info._lds = linkedDatas; } else { const isAudio = contentType[0].startsWith('audio/'); const isImage = contentType[0].startsWith('image/'); const isVideo = contentType[0].startsWith('video/'); if(isAudio || isImage || isVideo) { // this still seems like a terrible idea lol const { code, stdout, stderr } = await (new Deno.Command('ffprobe', { stdin: 'null', stdout: 'piped', stderr: 'piped', args: [ '-show_streams', '-show_format', '-print_format', 'json', '-v', 'quiet', '-i', url ], })).output(); if(code !== 0) { console.error(new TextDecoder().decode(stderr)); } else { const probe = JSON.parse(new TextDecoder().decode(stdout).trim()); if(isDebug) info._ffprobe = probe; if(typeof probe?.format === 'object') { const media = {}; info.media = media; media.confidence = Math.min(1, Math.max(0, probe.format.probe_score / 100.0)); const pfDuration = parseFloat(probe.format.duration); if(!isNaN(pfDuration)) media.duration = pfDuration; const pfSize = parseInt(probe.format.size); if(!isNaN(pfSize)) media.size = pfSize; const pfBitRate = parseInt(probe.format.bit_rate); if(!isNaN(pfBitRate)) { if(version < 2) media.bitRate = pfBitRate; else media.bitrate = pfBitRate; } // in Title case cus JS doesnt have an accessible lcfirst equivalent :p const pftFields = ['Title', 'Artist', 'Album', 'Date', 'Comment', 'Genre']; if(Array.isArray(probe.streams)) for(const stream of probe.streams) if(stream.codec_type === 'video') { media.width = stream.coded_width ?? stream.width ?? 0; media.height = stream.coded_height ?? stream.height ?? 0; if(typeof stream.display_aspect_ratio === 'string') { if(version < 2) media.aspectRatio = stream.display_aspect_ratio; else media.aspect_ratio = stream.display_aspect_ratio; } } else if(stream.codec_type === 'audio') { if(typeof stream.tags === 'object') for(const pftFieldName of pftFields) { const pftFieldValue = stream.tags[pftFieldName] ?? probe.format.tags[pftFieldName.toLowerCase()] ?? probe.format.tags[pftFieldName.toUpperCase()]; if(typeof pftFieldValue === 'string') { if(typeof media.tags !== 'object') media.tags = {}; media.tags[pftFieldName.toLowerCase()] = pftFieldValue; } } } if(typeof probe.format.tags === 'object') for(const pftFieldName of pftFields) { const pftFieldValue = probe.format.tags[pftFieldName] ?? probe.format.tags[pftFieldName.toLowerCase()] ?? probe.format.tags[pftFieldName.toUpperCase()]; if(typeof pftFieldValue === 'string') { if(typeof media.tags !== 'object') media.tags = {}; media.tags[pftFieldName.toLowerCase()] = pftFieldValue; } } } } if(isAudio) { info.audio_url = url; info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/audio?url=${encodeURIComponent(url)}`; info.image_type = 'image/png'; let title = ''; if(typeof info.media.tags.artist === 'string') title += `${info.media.tags.artist} - `; if(typeof info.media.tags.title === 'string') title += info.media.tags.title; if(typeof info.media.tags.date === 'string') title += ` (${info.media.tags.date})`; title = title.trim(); if(title !== '') info.title = title; if(typeof info.media.tags.comment === 'string') info.description = info.media.tags.comment.trim(); } else if(isImage) { info.image_url = url; info.image_type = info.media_type; if(info.media.width > 0) info.width = info.image_width = info.media.width; if(info.media.height > 0) info.height = info.image_height = info.media.height; } else if(isVideo) { info.video_url = url; info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/video?url=${encodeURIComponent(url)}`; info.image_type = 'image/png'; if(info.media.width > 0) info.image_width = info.width = info.video_width = info.media.width; if(info.media.height > 0) info.image_height = info.height = info.video_height = info.media.height; } if(version < 2) { info.image = info.image_url; if(isAudio) info.is_audio = true; else if(isImage) info.is_image = true; else if(isVideo) info.is_video = true; } } } if(isAllowedOEmbedDomain(urlInfo.host)) { const oEmbedData = await extractOEmbedData(response, html, url, urlInfo); if(oEmbedData.version) info.oembed = oEmbedData; } if(version < 2 && info.video_url) { if(info.video_url.startsWith('https://www.youtube.com/')) { const ytVidUrl = new URL(info.video_url); const ytVidUrlParams = new URLSearchParams(ytVidUrl.search); info.type = 'youtube:video'; info.youtube_video_id = basename(ytVidUrl.pathname); if(ytVidUrlParams.has('list')) info.youtube_playlist = ytVidUrlParams.get('list'); } else if(info.video_url.startsWith('https://embed.nicovideo.jp/')) { const nndVidUrl = new URL(info.video_url); info.type = 'niconico:video'; info.nicovideo_video_id = basename(nndVidUrl.pathname); } } return info; }; const requestHandler = async (req: Request): Response => { const url = new URL(req.url); const headers = { 'X-Powered-By': 'Uiharu' }; if(req.headers.has('origin')) { const originRaw = req.headers.get('origin'); const origin = new URL(originRaw); if(!allowedOrigins.includes(origin.host)) return new Response('403', { status: 403, headers }); headers['Access-Control-Allow-Origin'] = originRaw; headers['Vary'] = 'Origin'; } if(req.method === 'OPTIONS') { headers['Allow'] = 'OPTIONS, GET, HEAD, POST'; headers['Access-Control-Allow-Methods'] = 'OPTIONS, GET, HEAD, POST'; // idk if this is the appropriate status code but: balls return new Response('', { status: 204, headers }); } if(url.pathname === '/metadata') { if(!['GET', 'HEAD', 'POST'].includes(req.method)) return new Response('', { status: 405, headers }); const started = performance.now(); const urlParams = new URLSearchParams(url.search); headers['Content-Type'] = 'application/json;charset=utf-8'; let urlParamRaw: String = ''; if(req.method === 'POST') urlParamRaw = (await readableStreamToString(req.body)).trim(); else urlParamRaw = urlParams.get('url')?.trim() ?? ''; if(urlParamRaw === '') return new Response('{"error":"metadata:uri"}', { status: 400, headers }); if(urlParamRaw.startsWith('//')) urlParamRaw = 'https:' + urlParamRaw; let urlParam: URL; try { urlParam = new URL(urlParamRaw); } catch(ex) { return new Response('{"error":"metadata:uri"}', { status: 400, headers }); } urlParamRaw = urlParam.toString(); const formatVersion = parseInt(urlParams.get('fv')) || 1; if(formatVersion < 1 || formatVersion > 2) return new Response('{"error":"metadata:version"}', { status: 400, headers }); const urlHash = encodeBase64Url( await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw)) ); const cacheKey = `uiharu:${appVersion}:md:fv${formatVersion}:${urlHash}`; const cacheInfo = await cache.get(cacheKey); if(cacheInfo !== undefined) return new Response( brotliDecompressSync(cacheInfo.value), { status: 200, headers: { ...headers, ...{ 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, 'X-Uiharu-State': 'cache', }, }, } ); try { const json = JSON.stringify( await extractMetadata(formatVersion, urlParamRaw, urlParam) ); cache.set(cacheKey, brotliCompressSync(json), { compress: false, lifetime: 600 }); return new Response(json, { status: 200, headers: { ...headers, ...{ 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, 'X-Uiharu-State': 'fresh', }, }, }); } catch(ex) { console.error(ex); return new Response('{"error":"metadata:lookup"}', { status: 500, headers }); } } if(url.pathname === '/metadata/batch') { if(!['GET', 'HEAD', 'POST'].includes(req.method)) return new Response('', { status: 405, headers }); return new Response('{"took":0,"results":[]}', { headers: { ...headers, ...{ 'Content-Type': 'application/json' }, }, }); } const isAudio = url.pathname === '/metadata/thumb/audio'; const isVideo = url.pathname === '/metadata/thumb/video'; if(isAudio || isVideo) { if(!['HEAD', 'GET'].includes(req.method)) return new Response('', { status: 405, headers }); let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? ''; if(urlParamRaw === '') return new Response('missing url parameter', { status: 400, headers }); let scheme: String = ''; try { const urlParam = new URL(urlParamRaw); if(typeof urlParam.protocol === 'string') scheme = urlParam.protocol; urlParamRaw = urlParam.toString(); } catch(ex) { return new Response('invalid url parameter', { status: 400, headers }); } if(!['http:', 'https:'].includes(scheme)) return new Response('unsupported url scheme', { status: 400, headers }); // this seems like a terrible idea lol const args = ['-i', urlParamRaw]; if(isAudio) args.push('-an'); args.push('-f'); args.push('image2pipe'); args.push('-c:v'); args.push(isVideo ? 'png' : 'copy'); args.push('-frames:v'); args.push('1'); args.push('-'); const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', { stdin: 'null', stdout: 'piped', stderr: 'piped', args, })).output(); if(code !== 0) { console.error(new TextDecoder().decode(stderr)); return new Response('decode failed', { status: 500, headers }); } // TODO: bother with cache someday maybe const thumb = stdout; return new Response(thumb, { headers: { ...headers, ...{ 'Content-Type': 'image/png', 'Cache-Control': 'public, max-age=31536000, immutable', }, }, }); } // serving files from /public dir if(['HEAD', 'GET'].includes(req.method)) { const localPathPrefix = import.meta.dirname + '/public/'; const localPathSuffix = pathNormalize(url.pathname === '/' ? '/index.html' : url.pathname); const localPath = pathNormalize(localPathPrefix + localPathSuffix); if(localPath.startsWith(localPathPrefix) && existsSync(localPath)) { const mediaTypes = { 'html': 'text/html;charset=utf-8', 'css': 'text/css;charset=utf-8', 'txt': 'text/plain;charset=utf-8', 'png': 'image/png', }; let mediaType: String = 'application/octet-stream'; const dotIndex = localPathSuffix.lastIndexOf('.'); if(dotIndex >= 0) { const ext = localPathSuffix.substring(dotIndex + 1); if(ext in mediaTypes) mediaType = mediaTypes[ext]; } return new Response('', { status: 200, headers: { ...headers, ...{ 'Content-Type': mediaType, 'X-Accel-Redirect': `/_public${localPathSuffix}`, } }, }); } // 404 page return new Response('404 Not Found

404 Not Found

', { status: 404, headers: { ...headers, ...{ 'Content-Type': 'text/html;charset=utf-8' }, }, }); } // 404 fallback return new Response('', { status: ['OPTIONS', 'HEAD', 'GET', 'POST'].includes(req.method) ? 404 : 405, headers, }); }; Deno.serve({ port }, requestHandler);