diff --git a/src/consts.ts b/src/consts.ts new file mode 100644 index 0000000..2746f29 --- /dev/null +++ b/src/consts.ts @@ -0,0 +1,6 @@ +import { existsSync } from "jsr:@std/fs"; +import { join } from "jsr:@std/path"; + +export const APP_VERSION: string = '20241029'; +export const IS_DEBUG: bool = existsSync(join(import.meta.dirname, '/../.debug')); +export const PUBLIC_DIR: string = join(import.meta.dirname, '/../public'); diff --git a/src/fetch.ts b/src/fetch.ts new file mode 100644 index 0000000..a68afc6 --- /dev/null +++ b/src/fetch.ts @@ -0,0 +1,16 @@ +import { APP_VERSION } from './consts.ts'; + +export async function fetchWithHeaders(url, init) { + if(!init) + init = {}; + if(!init.headers) + init.headers = {}; + if(!init.headers['Accept']) + init.headers['Accept'] = 'text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8'; + if(!init.headers['Accept-Language']) + init.headers['Accept-Language'] = 'en-GB, en;q=0.9, ja-jp;q=0.6, *;q=0.5'; + if(!init.headers['User-Agent']) + init.headers['User-Agent'] = `Mozilla/5.0 (compatible; Uiharu/${APP_VERSION}; +http://fii.moe/uiharu)`; + + return await fetch(url, init); +}; diff --git a/src/handlers/local.ts b/src/handlers/local.ts new file mode 100644 index 0000000..e90271f --- /dev/null +++ b/src/handlers/local.ts @@ -0,0 +1,55 @@ +import { PUBLIC_DIR } from '../consts.ts'; +import { normalize } from "jsr:@std/path/normalize"; +import { existsSync } from "jsr:@std/fs"; + +const mediaTypes = { + 'html': 'text/html;charset=utf-8', + 'css': 'text/css;charset=utf-8', + 'txt': 'text/plain;charset=utf-8', + 'png': 'image/png', +}; + +function extractMediaType(path: string): string { + let mediaType: string = 'application/octet-stream'; + const dotIndex = path.lastIndexOf('.'); + if(dotIndex >= 0) { + const ext = path.substring(dotIndex + 1); + if(ext in mediaTypes) + mediaType = mediaTypes[ext]; + } + + return mediaType; +} + +function publicPathExists(path: string): bool { + const full = normalize(PUBLIC_DIR + path); + return full.startsWith(PUBLIC_DIR) && existsSync(full); +} + +export function handlePublicPath( + headers, + path: string +): Response { + path = normalize(path === '/' ? '/index.html' : path); + + if(publicPathExists(path)) + return new Response('', { + status: 200, + headers: { + ...headers, + ...{ + 'Content-Type': extractMediaType(path), + 'X-Accel-Redirect': `/_public${path}`, + } + }, + }); + + // 404 page + return new Response('404 Not Found

404 Not Found

', { + status: 404, + headers: { + ...headers, + ...{ 'Content-Type': 'text/html;charset=utf-8' }, + }, + }); +} diff --git a/src/handlers/lookup.ts b/src/handlers/lookup.ts new file mode 100644 index 0000000..8ae71bb --- /dev/null +++ b/src/handlers/lookup.ts @@ -0,0 +1,107 @@ +import { APP_VERSION } from '../consts.ts'; +import { readableStreamToString } from '../rs2str.ts'; +import { extractMetadata } from '../metadata.ts'; +import { encodeBase64Url } from "jsr:@std/encoding/base64url"; +import { brotliCompressSync, brotliDecompressSync } from "node:zlib"; +import { MemcacheClient } from 'npm:memcache-client@^1.0.5'; + +export async function handleMetadataLookup( + url: URL, + headers, + req: Request, + cache: MemcacheClient, + hostName: string +): Response { + if(!['GET', 'HEAD', 'POST'].includes(req.method)) + return new Response('', { status: 405, headers }); + + const started = performance.now(); + const urlParams = new URLSearchParams(url.search); + + headers['Content-Type'] = 'application/json;charset=utf-8'; + + let urlParamRaw: String = ''; + if(req.method === 'POST') + urlParamRaw = (await readableStreamToString(req.body)).trim(); + else + urlParamRaw = urlParams.get('url')?.trim() ?? ''; + + if(urlParamRaw === '') + return new Response('{"error":"metadata:uri"}', { status: 400, headers }); + if(urlParamRaw.startsWith('//')) + urlParamRaw = 'https:' + urlParamRaw; + + let urlParam: URL; + try { + urlParam = new URL(urlParamRaw); + } catch(ex) { + return new Response('{"error":"metadata:uri"}', { status: 400, headers }); + } + + urlParamRaw = urlParam.toString(); + + const formatVersion = parseInt(urlParams.get('fv')) || 1; + + if(formatVersion < 1 || formatVersion > 2) + return new Response('{"error":"metadata:version"}', { status: 400, headers }); + + const urlHash = encodeBase64Url( + await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw)) + ); + const cacheKey = `uiharu:${APP_VERSION}:md:fv${formatVersion}:${urlHash}`; + const cacheInfo = await cache.get(cacheKey); + if(cacheInfo !== undefined) + return new Response( + brotliDecompressSync(cacheInfo.value), + { + status: 200, + headers: { + ...headers, + ...{ + 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, + 'X-Uiharu-State': 'cache', + }, + }, + } + ); + + try { + const json = JSON.stringify( + await extractMetadata(formatVersion, hostName, urlParamRaw, urlParam) + ); + + cache.set(cacheKey, brotliCompressSync(json), { + compress: false, + lifetime: 600 + }); + + return new Response(json, { + status: 200, + headers: { + ...headers, + ...{ + 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, + 'X-Uiharu-State': 'fresh', + }, + }, + }); + } catch(ex) { + console.error(ex); + return new Response('{"error":"metadata:lookup"}', { status: 500, headers }); + } +}; + +export function handleMetadataBatchLookup( + headers, + req: Request +): Response { + if(!['GET', 'HEAD', 'POST'].includes(req.method)) + return new Response('', { status: 405, headers }); + + return new Response('{"took":0,"results":[]}', { + headers: { + ...headers, + ...{ 'Content-Type': 'application/json' }, + }, + }); +} diff --git a/src/handlers/thumb.ts b/src/handlers/thumb.ts new file mode 100644 index 0000000..a113683 --- /dev/null +++ b/src/handlers/thumb.ts @@ -0,0 +1,64 @@ +export async function handleThumbnailRetrieve( + url: URL, + headers, + req: Request, + isAudio: bool, + isVideo: bool +): Response { + if(!['HEAD', 'GET'].includes(req.method)) + return new Response('', { status: 405, headers }); + + let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? ''; + if(urlParamRaw === '') + return new Response('missing url parameter', { status: 400, headers }); + + let scheme: String = ''; + try { + const urlParam = new URL(urlParamRaw); + if(typeof urlParam.protocol === 'string') + scheme = urlParam.protocol; + + urlParamRaw = urlParam.toString(); + } catch(ex) { + return new Response('invalid url parameter', { status: 400, headers }); + } + + if(!['http:', 'https:'].includes(scheme)) + return new Response('unsupported url scheme', { status: 400, headers }); + + // this seems like a terrible idea lol + const args = ['-i', urlParamRaw]; + if(isAudio) args.push('-an'); + args.push('-f'); + args.push('image2pipe'); + args.push('-c:v'); + args.push(isVideo ? 'png' : 'copy'); + args.push('-frames:v'); + args.push('1'); + args.push('-'); + + const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', { + stdin: 'null', + stdout: 'piped', + stderr: 'piped', + args, + })).output(); + + if(code !== 0) { + console.error(new TextDecoder().decode(stderr)); + return new Response('decode failed', { status: 500, headers }); + } + + // TODO: bother with cache someday maybe + const thumb = stdout; + + return new Response(thumb, { + headers: { + ...headers, + ...{ + 'Content-Type': 'image/png', + 'Cache-Control': 'public, max-age=31536000, immutable', + }, + }, + }); +} diff --git a/src/metadata.ts b/src/metadata.ts new file mode 100644 index 0000000..e66fa99 --- /dev/null +++ b/src/metadata.ts @@ -0,0 +1,274 @@ +import { IS_DEBUG } from './consts.ts'; +import { fetchWithHeaders } from './fetch.ts'; +import { readableStreamToString } from './rs2str.ts'; +import { extractOEmbedData, isAllowedOEmbedDomain } from './metadata/oembed.ts'; +import { extractOpenGraphData } from './metadata/og.ts'; +import { extractHtmlMetaData } from './metadata/html.ts'; +import { extractTwitterData } from './metadata/twitter.ts'; +import { extractLinkedData } from './metadata/ld.ts'; +import * as cheerio from 'npm:cheerio@^1.0.0'; +import { basename } from "jsr:@std/path"; +import { parseMediaType } from "jsr:@std/media-types"; + +export async function extractMetadata( + version: number, + hostName: string, + url: string, + urlInfo: URL +) { + const response = await fetchWithHeaders(url); + const contentTypeRaw = response.headers.get('content-type') ?? ''; + const contentType = parseMediaType(contentTypeRaw); + + const info = {}; + const addInfoOrDont = (prop, value) => { + if(value !== null && value !== undefined) + info[prop] = value; + }; + + info.url = url; + info.title = decodeURIComponent(basename(urlInfo.pathname)); + info.site_name = urlInfo.host; + + if(contentType[0]) + info.media_type = contentType[0]; + + let html = undefined; + + if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) { + html = cheerio.load(await readableStreamToString(response.body)); + + const metaData = extractHtmlMetaData(html); + const ogData = extractOpenGraphData(html); + const twitterData = extractTwitterData(html); + + addInfoOrDont('url', ogData.url ?? metaData.canonical_url); + addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title); + addInfoOrDont('site_name', ogData.site_name); + addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description); + addInfoOrDont('color', metaData.theme_color); + + if(ogData.images?.length > 0) { + const image = ogData.images[0]; + info.image_url = image.secure_url ?? image.url; + if(image.width > 0) + info.image_width = image.width; + if(image.height > 0) + info.image_height = image.height; + if(image.type) + info.image_type = image.type; + if(image.alt) + info.image_alt = image.alt; + } else { + addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail); + addInfoOrDont('image_alt', twitterData.image_alt); + } + + if(ogData.audios?.length > 0) { + const audio = ogData.audios[0]; + info.audio_url = audio.secure_url ?? audio.url; + if(audio.type) + info.audio_type = audio.type; + } + + if(ogData.videos?.length > 0) { + const video = ogData.videos[0]; + info.video_url = video.secure_url ?? video.url; + if(video.width > 0) + info.video_width = video.width; + if(video.height > 0) + info.video_height = video.height; + if(video.type) + info.video_type = video.type; + if(video.tags?.length > 0) + info.video_tags = video.tags; + } else { + addInfoOrDont('video_url', twitterData.player); + addInfoOrDont('video_width', twitterData.player_width); + addInfoOrDont('video_height', twitterData.player_height); + } + + if(version < 2) { + info.image = info.image_url; + if(info.video_width > 0) + info.width = info.video_width; + else if(info.image_width > 0) + info.width = info.image_width; + if(info.video_height > 0) + info.height = info.video_height; + else if(info.image_height > 0) + info.height = info.image_height; + } + + const linkedDatas = extractLinkedData(html); + // idk what to do with this yet, only including this in debug mode for now + if(IS_DEBUG && linkedDatas.length > 0) + info._lds = linkedDatas; + } else { + const isAudio = contentType[0].startsWith('audio/'); + const isImage = contentType[0].startsWith('image/'); + const isVideo = contentType[0].startsWith('video/'); + + if(isAudio || isImage || isVideo) { + // this still seems like a terrible idea lol + const { code, stdout, stderr } = await (new Deno.Command('ffprobe', { + stdin: 'null', + stdout: 'piped', + stderr: 'piped', + args: [ + '-show_streams', + '-show_format', + '-print_format', 'json', + '-v', 'quiet', + '-i', url + ], + })).output(); + + if(code !== 0) { + console.error(new TextDecoder().decode(stderr)); + } else { + const probe = JSON.parse(new TextDecoder().decode(stdout).trim()); + if(IS_DEBUG) + info._ffprobe = probe; + + if(typeof probe?.format === 'object') { + const media = {}; + info.media = media; + media.confidence = Math.min(1, Math.max(0, probe.format.probe_score / 100.0)); + + const pfDuration = parseFloat(probe.format.duration); + if(!isNaN(pfDuration)) + media.duration = pfDuration; + + const pfSize = parseInt(probe.format.size); + if(!isNaN(pfSize)) + media.size = pfSize; + + const pfBitRate = parseInt(probe.format.bit_rate); + if(!isNaN(pfBitRate)) { + if(version < 2) + media.bitRate = pfBitRate; + else + media.bitrate = pfBitRate; + } + + // in Title case cus JS doesnt have an accessible lcfirst equivalent :p + const pftFields = ['Title', 'Artist', 'Album', 'Date', 'Comment', 'Genre']; + + if(Array.isArray(probe.streams)) + for(const stream of probe.streams) + if(stream.codec_type === 'video') { + media.width = stream.coded_width ?? stream.width ?? 0; + media.height = stream.coded_height ?? stream.height ?? 0; + + if(typeof stream.display_aspect_ratio === 'string') { + if(version < 2) + media.aspectRatio = stream.display_aspect_ratio; + else + media.aspect_ratio = stream.display_aspect_ratio; + } + } else if(stream.codec_type === 'audio') { + if(typeof stream.tags === 'object') + for(const pftFieldName of pftFields) { + const pftFieldValue = stream.tags[pftFieldName] + ?? probe.format.tags[pftFieldName.toLowerCase()] + ?? probe.format.tags[pftFieldName.toUpperCase()]; + + if(typeof pftFieldValue === 'string') { + if(typeof media.tags !== 'object') + media.tags = {}; + + media.tags[pftFieldName.toLowerCase()] = pftFieldValue; + } + } + } + + if(typeof probe.format.tags === 'object') + for(const pftFieldName of pftFields) { + const pftFieldValue = probe.format.tags[pftFieldName] + ?? probe.format.tags[pftFieldName.toLowerCase()] + ?? probe.format.tags[pftFieldName.toUpperCase()]; + + if(typeof pftFieldValue === 'string') { + if(typeof media.tags !== 'object') + media.tags = {}; + + media.tags[pftFieldName.toLowerCase()] = pftFieldValue; + } + } + } + } + + if(isAudio) { + info.audio_url = url; + info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/audio?url=${encodeURIComponent(url)}`; + info.image_type = 'image/png'; + + let title = ''; + if(typeof info.media.tags.artist === 'string') + title += `${info.media.tags.artist} - `; + if(typeof info.media.tags.title === 'string') + title += info.media.tags.title; + if(typeof info.media.tags.date === 'string') + title += ` (${info.media.tags.date})`; + title = title.trim(); + if(title !== '') + info.title = title; + + if(typeof info.media.tags.comment === 'string') + info.description = info.media.tags.comment.trim(); + } else if(isImage) { + info.image_url = url; + info.image_type = info.media_type; + + if(info.media.width > 0) + info.width = info.image_width = info.media.width; + if(info.media.height > 0) + info.height = info.image_height = info.media.height; + } else if(isVideo) { + info.video_url = url; + info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/video?url=${encodeURIComponent(url)}`; + info.image_type = 'image/png'; + + if(info.media.width > 0) + info.image_width = info.width = info.video_width = info.media.width; + if(info.media.height > 0) + info.image_height = info.height = info.video_height = info.media.height; + } + + if(version < 2) { + info.image = info.image_url; + + if(isAudio) + info.is_audio = true; + else if(isImage) + info.is_image = true; + else if(isVideo) + info.is_video = true; + } + } + } + + if(isAllowedOEmbedDomain(urlInfo.host)) { + const oEmbedData = await extractOEmbedData(response, html, url, urlInfo); + if(oEmbedData.version) + info.oembed = oEmbedData; + } + + if(version < 2 && info.video_url) { + if(info.video_url.startsWith('https://www.youtube.com/')) { + const ytVidUrl = new URL(info.video_url); + const ytVidUrlParams = new URLSearchParams(ytVidUrl.search); + info.type = 'youtube:video'; + info.youtube_video_id = basename(ytVidUrl.pathname); + if(ytVidUrlParams.has('list')) + info.youtube_playlist = ytVidUrlParams.get('list'); + } else if(info.video_url.startsWith('https://embed.nicovideo.jp/')) { + const nndVidUrl = new URL(info.video_url); + info.type = 'niconico:video'; + info.nicovideo_video_id = basename(nndVidUrl.pathname); + } + } + + return info; +}; diff --git a/src/metadata/html.ts b/src/metadata/html.ts new file mode 100644 index 0000000..068f7aa --- /dev/null +++ b/src/metadata/html.ts @@ -0,0 +1,31 @@ +import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts"; + +export function extractHtmlMetaData(html) { + const values = {}; + + const titleTag = html('title')?.first()?.text().trim() ?? ''; + if(titleTag.length > 0) + values.title = titleTag; + + const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? ''; + if(metaDescriptionTag.length > 0) + values.description = metaDescriptionTag; + + const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? ''; + if(metaThumbnailTag.length > 0) + values.thumbnail = metaThumbnailTag; + + const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? ''; + if(metaThemeColorTag.length > 0) + values.theme_color = Color.string(metaThemeColorTag).hex(); + + const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? ''; + if(linkImageSrcTag.length > 0) + values.image = linkImageSrcTag; + + const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? ''; + if(linkCanonicalTag.length > 0) + values.canonical_url = linkCanonicalTag; + + return values; +}; diff --git a/src/metadata/ld.ts b/src/metadata/ld.ts new file mode 100644 index 0000000..974ae0c --- /dev/null +++ b/src/metadata/ld.ts @@ -0,0 +1,13 @@ +export function extractLinkedData(html) { + const values = []; + + const tags = html('script[type="application/ld+json"]'); + for(const tagInfo of tags) + try { + values.push(JSON.parse(html(tagInfo).text().trim())); + } catch(ex) { + console.error(ex); + } + + return values; +}; diff --git a/src/metadata/oembed.ts b/src/metadata/oembed.ts new file mode 100644 index 0000000..c7fa699 --- /dev/null +++ b/src/metadata/oembed.ts @@ -0,0 +1,166 @@ +import { fetchWithHeaders } from '../fetch.ts'; + +// copied from wordpress source sorta +// i was going to make this a config setting but some services dont provide an alternate url +// so it will become more involved in the future +const allowOEmbed: String[] = [ + '.youtube.com', + '.youtu.be', + '.vimeo.com', + '.dailymotion.com', + '.dai.ly', + '.flickr.com', + '.flic.kr', + '.smugmug.com', + '.scribd.com', + '.wordpress.tv', + '.crowdsignal.net', + '.polldaddy.com', + '.poll.fm', + '.survey.fm', + '.twitter.com', + '.soundcloud.com', + '.spotify.com', + '.imgur.com', + '.issuu.com', + '.mixcloud.com', + '.ted.com', + '.animoto.com', + '.video214.com', + '.tumblr.com', + '.kickstarter.com', + '.kck.st', + '.cloudup.com', + '.reverbnation.com', + '.videopress.com', + '.reddit.com', + '.speakerdeck.com', + '.screencast.com', + '.amazon.com', + '.amazon.com.mx', + '.amazon.com.br', + '.amazon.ca', + '.amazon.co.uk', + '.amazon.de', + '.amazon.fr', + '.amazon.it', + '.amazon.es', + '.amazon.in', + '.amazon.nl', + '.amazon.ru', + '.amazon.co.jp', + '.amazon.com.au', + '.amazon.cn', + '.a.co', + '.amzn.to', + '.amzn.eu', + '.amzn.in', + '.amzn.asia', + '.z.cn', + '.somecards.com', + '.some.ly', + '.tiktok.com', + '.pinterest.com', + '.pinterest.com.au', + '.pinterest.com.mx', + '.wolframcloud.com', + '.instagram.com', + '.facebook.com', + '.pca.st', + '.anghami.com', + '.bsky.app', + '.apple.com', + '.flashii.net', + '.fii.moe', + '.tako.zone', + '.patchii.net', + '.railgun.sh', + '.flash.moe', + '.edgii.net', +]; + +function isDomainSuffix(known: string, user: string) { + if(!known.startsWith('.')) + known = '.' + known; + if(!user.startsWith('.')) + user = '.' + user; + + return user.endsWith(known); +}; + +function parseLinkHeader(header: string) { + const links = []; + + const lines = header.split(','); + for(const key in lines) { + const parts = lines[key].trim().split(';').map(part => part.trim()); + + let href = parts.shift(); + if(typeof href !== 'string' || !href.startsWith('<') || !href.endsWith('>')) + continue; + + href = decodeURI(href.slice(1, -1)); + const link = {}; + links.push(link); + + for(const part of parts) { + const attr = part.split('=', 2); + let value = attr[1]; + if(value.startsWith('"') && value.endsWith('"')) + value = value.slice(1, -1); + + link[attr[0]] = value; + } + + // applying this last to avoid tomfoolery :3 + link.href = href; + } + + return links; +}; + +export function isAllowedOEmbedDomain(domain: string): Boolean { + if(!domain.startsWith('.')) + domain = '.' + domain; + + for(const suffix of allowOEmbed) + if(domain.endsWith(suffix)) + return true; + + return false; +}; + +export async function extractOEmbedData(response: Response, html, url: string, urlInfo: URL) { + let oEmbedUrl: string = ''; + + // TODO: maintain a fucking list because its too difficult for services to just provide tags + if(isDomainSuffix('x.com', urlInfo.host) || isDomainSuffix('twitter.com', urlInfo.host)) + oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`; + else if(isDomainSuffix('soundcloud.com', urlInfo.host)) + oEmbedUrl = `https://soundcloud.com/oembed?format=json&url=${encodeURIComponent(url)}`; + else if(isDomainSuffix('tiktok.com', urlInfo.host)) + oEmbedUrl = `https://www.tiktok.com/oembed?url=${encodeURIComponent(url)}`; + else if(isDomainSuffix('mixcloud.com', urlInfo.host)) + oEmbedUrl = `https://app.mixcloud.com/oembed/?url=${encodeURIComponent(url)}`; + else if(html !== undefined) + oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? ''; + + if(oEmbedUrl === '') { + const links = parseLinkHeader(response.headers.get('link') ?? ''); + for(const link of links) + if(link.rel === 'alternate' && link.type === 'application/json+oembed') { + oEmbedUrl = link.href; + break; + } + } + + if(oEmbedUrl === '') + return {}; + + try { + return (await fetchWithHeaders(oEmbedUrl)).json(); + } catch(ex) { + console.error(ex); + return {}; + } +}; diff --git a/src/metadata/og.ts b/src/metadata/og.ts new file mode 100644 index 0000000..ac0f0b1 --- /dev/null +++ b/src/metadata/og.ts @@ -0,0 +1,117 @@ +export function extractOpenGraphData(html) { + const values = {}; + + // this is hateful + const properties = { + 'url': { type: 'url' }, + 'type': { type: 'str' }, + 'title': { type: 'str' }, + 'locale': { type: 'str' }, + 'locale:alternate': { type: 'str', array: 'locales' }, + 'description': { type: 'str' }, + 'determiner': { type: 'str' }, + 'site_name': { type: 'str' }, + + 'image': { alias: 'image:url', array: 'images' }, + 'image:url': { of: 'image', type: 'url' }, + 'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] }, + 'image:type': { of: 'image', type: 'mime' }, + 'image:width': { of: 'image', type: 'int' }, + 'image:height': { of: 'image', type: 'int' }, + 'image:alt': { of: 'image', type: 'string' }, + + 'video': { alias: 'video:url', array: 'videos' }, + 'video:url': { of: 'video', type: 'url' }, + 'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] }, + 'video:type': { of: 'video', type: 'mime' }, + 'video:width': { of: 'video', type: 'int' }, + 'video:height': { of: 'video', type: 'int' }, + 'video:tag': { of: 'video', type: 'str', array: 'tags' }, + + 'audio': { alias: 'audio:url', array: 'audios' }, + 'audio:url': { of: 'audio', type: 'url' }, + 'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] }, + 'audio:type': { of: 'audio', type: 'mime' }, + }; + + const tags = html('meta[property^="og:"]'); + for(const tagInfo of tags) { + const tag = html(tagInfo); + + let name = (tag.attr('property')?.trim() ?? '').substring(3); + if(!(name in properties)) + continue; + + let value = tag.attr('content')?.trim() ?? ''; + + let propInfo = properties[name]; + let target = values; + + if(propInfo.alias) { + name = propInfo.alias; + propInfo = properties[name]; + } + + if(propInfo.of) { + name = name.substring(propInfo.of.length + 1); + const objInfo = properties[propInfo.of]; + + if(objInfo.array) { + if(objInfo.array in target) + target = target[objInfo.array]; + else + target = target[objInfo.array] = []; + + const lastItem = target[target.length - 1]; + if(lastItem === undefined || name in lastItem) { + const newItem = {}; + target.push(newItem); + target = newItem; + } else + target = lastItem; + } else { + if(!(name in target)) + target[name] = {}; + + target = target[name]; + } + } + + if(propInfo.array) { + if(propInfo.array in target) + target = target[propInfo.array]; + else + target = target[propInfo.array] = []; + } else if(name in target) + continue; + + if(propInfo.type === 'int') + value = parseInt(value); + else { + if(propInfo.type === 'mime') { + // world's most naive validation + if(value.indexOf('/') < 0) + value = undefined; + } else if(propInfo.type === 'url') { + try { + const protos = propInfo.protos ?? ['https:', 'http:']; + if(!protos.includes(new URL(value).protocol)) + value = undefined; + } catch(ex) { + console.error(ex); + value = undefined; + } + } else if(propInfo.type !== 'str') + value = undefined; + } + + if(value) { + if(propInfo.array) + target.push(value); + else + target[name] = value; + } + } + + return values; +}; diff --git a/src/metadata/twitter.ts b/src/metadata/twitter.ts new file mode 100644 index 0000000..36e675f --- /dev/null +++ b/src/metadata/twitter.ts @@ -0,0 +1,27 @@ +export function extractTwitterData(html) { + const values = {}; + const properties = [ + 'card', + 'site', + 'site:id', + 'creator', + 'creator:id', + 'description', + 'title', + 'image', + 'image:alt', + 'player', + 'player:width', + 'player:height', + 'player:stream', + ]; + + for(const property of properties) { + const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? ''; + if(tag.length > 0) + values[property.replace(':', '_')] = tag; + } + + return values; +}; + diff --git a/src/rs2str.ts b/src/rs2str.ts new file mode 100644 index 0000000..4e656e8 --- /dev/null +++ b/src/rs2str.ts @@ -0,0 +1,18 @@ +export async function readableStreamToString(stream?: ReadableStream): string { + if(stream === null) + return ''; + + const reader = stream.getReader(); + const decoder = new TextDecoder; + let result = ''; + + for(;;) { + const { done, value } = await reader.read(); + if(done) break; + result += decoder.decode(value, { stream: true }); + } + + result += decoder.decode(); + + return result; +}; diff --git a/uiharu.ts b/uiharu.ts index daef591..0dd6963 100644 --- a/uiharu.ts +++ b/uiharu.ts @@ -1,12 +1,7 @@ -import * as cheerio from 'npm:cheerio@^1.0.0'; +import { handleMetadataLookup, handleMetadataBatchLookup } from './src/handlers/lookup.ts'; +import { handleThumbnailRetrieve } from './src/handlers/thumb.ts'; +import { handlePublicPath } from './src/handlers/local.ts'; import { MemcacheClient } from 'npm:memcache-client@^1.0.5'; -import { existsSync } from "jsr:@std/fs"; -import { basename, join as pathJoin } from "jsr:@std/path"; -import { normalize as pathNormalize } from "jsr:@std/path/normalize"; -import { encodeBase64Url } from "jsr:@std/encoding/base64url"; -import { brotliCompressSync, brotliDecompressSync } from "node:zlib"; -import { parseMediaType } from "jsr:@std/media-types"; -import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts"; // todo: these should not be hardcoded lol const hostName: String = 'uiharu.edgii.net'; @@ -18,84 +13,7 @@ const allowedOrigins: String[] = [ 'sockchat.edgii.net', 'ajaxchat.edgii.net', ]; -const allowOEmbed: String[] = [ // copied from wordpress source sorta - '.youtube.com', - '.youtu.be', - '.vimeo.com', - '.dailymotion.com', - '.dai.ly', - '.flickr.com', - '.flic.kr', - '.smugmug.com', - '.scribd.com', - '.wordpress.tv', - '.crowdsignal.net', - '.polldaddy.com', - '.poll.fm', - '.survey.fm', - '.twitter.com', - '.soundcloud.com', - '.spotify.com', - '.imgur.com', - '.issuu.com', - '.mixcloud.com', - '.ted.com', - '.animoto.com', - '.video214.com', - '.tumblr.com', - '.kickstarter.com', - '.kck.st', - '.cloudup.com', - '.reverbnation.com', - '.videopress.com', - '.reddit.com', - '.speakerdeck.com', - '.screencast.com', - '.amazon.com', - '.amazon.com.mx', - '.amazon.com.br', - '.amazon.ca', - '.amazon.co.uk', - '.amazon.de', - '.amazon.fr', - '.amazon.it', - '.amazon.es', - '.amazon.in', - '.amazon.nl', - '.amazon.ru', - '.amazon.co.jp', - '.amazon.com.au', - '.amazon.cn', - '.a.co', - '.amzn.to', - '.amzn.eu', - '.amzn.in', - '.amzn.asia', - '.z.cn', - '.somecards.com', - '.some.ly', - '.tiktok.com', - '.pinterest.com', - '.pinterest.com.au', - '.pinterest.com.mx', - '.wolframcloud.com', - '.instagram.com', - '.facebook.com', - '.pca.st', - '.anghami.com', - '.bsky.app', - '.apple.com', - '.flashii.net', - '.fii.moe', - '.tako.zone', - '.patchii.net', - '.railgun.sh', - '.flash.moe', - '.edgii.net', -]; -const appVersion: String = '20241029'; -const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug')); const cache: MemcacheClient = new MemcacheClient({ server: memcacheServer, compressor: { @@ -105,574 +23,7 @@ const cache: MemcacheClient = new MemcacheClient({ }, }); -const uiharuFetch = async (url, init) => { - if(!init) - init = {}; - if(!init.headers) - init.headers = {}; - if(!init.headers['Accept']) - init.headers['Accept'] = 'text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8'; - if(!init.headers['Accept-Language']) - init.headers['Accept-Language'] = 'en-GB, en;q=0.9, ja-jp;q=0.6, *;q=0.5'; - if(!init.headers['User-Agent']) - init.headers['User-Agent'] = `Mozilla/5.0 (compatible; Uiharu/${appVersion}; +http://fii.moe/uiharu)`; - - return await fetch(url, init); -}; - -const readableStreamToString = async (stream?: ReadableStream): string => { - if(stream === null) - return ''; - - const reader = stream.getReader(); - const decoder = new TextDecoder; - let result = ''; - - for(;;) { - const { done, value } = await reader.read(); - if(done) break; - result += decoder.decode(value, { stream: true }); - } - - result += decoder.decode(); - - return result; -}; - -const isDomainSuffix = (known: string, user: string) => { - if(!known.startsWith('.')) - known = '.' + known; - if(!user.startsWith('.')) - user = '.' + user; - - return user.endsWith(known); -}; - -const isAllowedOEmbedDomain = (domain: string): Boolean => { - if(!domain.startsWith('.')) - domain = '.' + domain; - - for(const suffix of allowOEmbed) - if(domain.endsWith(suffix)) - return true; - - return false; -}; - -const extractHtmlMetaData = (html) => { - const values = {}; - - const titleTag = html('title')?.first()?.text().trim() ?? ''; - if(titleTag.length > 0) - values.title = titleTag; - - const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? ''; - if(metaDescriptionTag.length > 0) - values.description = metaDescriptionTag; - - const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? ''; - if(metaThumbnailTag.length > 0) - values.thumbnail = metaThumbnailTag; - - const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? ''; - if(metaThemeColorTag.length > 0) - values.theme_color = Color.string(metaThemeColorTag).hex(); - - const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? ''; - if(linkImageSrcTag.length > 0) - values.image = linkImageSrcTag; - - const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? ''; - if(linkCanonicalTag.length > 0) - values.canonical_url = linkCanonicalTag; - - return values; -}; - -const extractOpenGraphData = (html) => { - const values = {}; - - // this is hateful - const properties = { - 'url': { type: 'url' }, - 'type': { type: 'str' }, - 'title': { type: 'str' }, - 'locale': { type: 'str' }, - 'locale:alternate': { type: 'str', array: 'locales' }, - 'description': { type: 'str' }, - 'determiner': { type: 'str' }, - 'site_name': { type: 'str' }, - - 'image': { alias: 'image:url', array: 'images' }, - 'image:url': { of: 'image', type: 'url' }, - 'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] }, - 'image:type': { of: 'image', type: 'mime' }, - 'image:width': { of: 'image', type: 'int' }, - 'image:height': { of: 'image', type: 'int' }, - 'image:alt': { of: 'image', type: 'string' }, - - 'video': { alias: 'video:url', array: 'videos' }, - 'video:url': { of: 'video', type: 'url' }, - 'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] }, - 'video:type': { of: 'video', type: 'mime' }, - 'video:width': { of: 'video', type: 'int' }, - 'video:height': { of: 'video', type: 'int' }, - 'video:tag': { of: 'video', type: 'str', array: 'tags' }, - - 'audio': { alias: 'audio:url', array: 'audios' }, - 'audio:url': { of: 'audio', type: 'url' }, - 'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] }, - 'audio:type': { of: 'audio', type: 'mime' }, - }; - - const tags = html('meta[property^="og:"]'); - for(const tagInfo of tags) { - const tag = html(tagInfo); - - let name = (tag.attr('property')?.trim() ?? '').substring(3); - if(!(name in properties)) - continue; - - let value = tag.attr('content')?.trim() ?? ''; - - let propInfo = properties[name]; - let target = values; - - if(propInfo.alias) { - name = propInfo.alias; - propInfo = properties[name]; - } - - if(propInfo.of) { - name = name.substring(propInfo.of.length + 1); - const objInfo = properties[propInfo.of]; - - if(objInfo.array) { - if(objInfo.array in target) - target = target[objInfo.array]; - else - target = target[objInfo.array] = []; - - const lastItem = target[target.length - 1]; - if(lastItem === undefined || name in lastItem) { - const newItem = {}; - target.push(newItem); - target = newItem; - } else - target = lastItem; - } else { - if(!(name in target)) - target[name] = {}; - - target = target[name]; - } - } - - if(propInfo.array) { - if(propInfo.array in target) - target = target[propInfo.array]; - else - target = target[propInfo.array] = []; - } else if(name in target) - continue; - - if(propInfo.type === 'int') - value = parseInt(value); - else { - if(propInfo.type === 'mime') { - // world's most naive validation - if(value.indexOf('/') < 0) - value = undefined; - } else if(propInfo.type === 'url') { - try { - const protos = propInfo.protos ?? ['https:', 'http:']; - if(!protos.includes(new URL(value).protocol)) - value = undefined; - } catch(ex) { - console.error(ex); - value = undefined; - } - } else if(propInfo.type !== 'str') - value = undefined; - } - - if(value) { - if(propInfo.array) - target.push(value); - else - target[name] = value; - } - } - - return values; -}; - -const extractTwitterData = (html) => { - const values = {}; - const properties = [ - 'card', - 'site', - 'site:id', - 'creator', - 'creator:id', - 'description', - 'title', - 'image', - 'image:alt', - 'player', - 'player:width', - 'player:height', - 'player:stream', - ]; - - for(const property of properties) { - const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? ''; - if(tag.length > 0) - values[property.replace(':', '_')] = tag; - } - - return values; -}; - -const extractLinkedData = (html) => { - const values = []; - - const tags = html('script[type="application/ld+json"]'); - for(const tagInfo of tags) - try { - values.push(JSON.parse(html(tagInfo).text().trim())); - } catch(ex) { - console.error(ex); - } - - return values; -}; - -const parseLinkHeader = (header: string) => { - const links = []; - - const lines = header.split(','); - for(const key in lines) { - const parts = lines[key].trim().split(';').map(part => part.trim()); - - let href = parts.shift(); - if(typeof href !== 'string' || !href.startsWith('<') || !href.endsWith('>')) - continue; - - href = decodeURI(href.slice(1, -1)); - const link = {}; - links.push(link); - - for(const part of parts) { - const attr = part.split('=', 2); - let value = attr[1]; - if(value.startsWith('"') && value.endsWith('"')) - value = value.slice(1, -1); - - link[attr[0]] = value; - } - - // applying this last to avoid tomfoolery :3 - link.href = href; - } - - return links; -}; - -const extractOEmbedData = async (response: Response, html, url: string, urlInfo: URL) => { - let oEmbedUrl: string = ''; - - // TODO: maintain a fucking list because its too difficult for services to just provide tags - if(isDomainSuffix('x.com', urlInfo.host) || isDomainSuffix('twitter.com', urlInfo.host)) - oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`; - else if(isDomainSuffix('soundcloud.com', urlInfo.host)) - oEmbedUrl = `https://soundcloud.com/oembed?format=json&url=${encodeURIComponent(url)}`; - else if(isDomainSuffix('tiktok.com', urlInfo.host)) - oEmbedUrl = `https://www.tiktok.com/oembed?url=${encodeURIComponent(url)}`; - else if(isDomainSuffix('mixcloud.com', urlInfo.host)) - oEmbedUrl = `https://app.mixcloud.com/oembed/?url=${encodeURIComponent(url)}`; - else if(html !== undefined) - oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? ''; - - if(oEmbedUrl === '') { - const links = parseLinkHeader(response.headers.get('link') ?? ''); - for(const link of links) - if(link.rel === 'alternate' && link.type === 'application/json+oembed') { - oEmbedUrl = link.href; - break; - } - } - - if(oEmbedUrl === '') - return {}; - - try { - return (await uiharuFetch(oEmbedUrl)).json(); - } catch(ex) { - console.error(ex); - return {}; - } -}; - -const extractMetadata = async (version: number, url: string, urlInfo: URL) => { - const response = await uiharuFetch(url); - const contentTypeRaw = response.headers.get('content-type') ?? ''; - const contentType = parseMediaType(contentTypeRaw); - - const info = {}; - const addInfoOrDont = (prop, value) => { - if(value !== null && value !== undefined) - info[prop] = value; - }; - - info.url = url; - info.title = decodeURIComponent(basename(urlInfo.pathname)); - info.site_name = urlInfo.host; - - if(contentType[0]) - info.media_type = contentType[0]; - - let html = undefined; - - if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) { - html = cheerio.load(await readableStreamToString(response.body)); - - const metaData = extractHtmlMetaData(html); - const ogData = extractOpenGraphData(html); - const twitterData = extractTwitterData(html); - - addInfoOrDont('url', ogData.url ?? metaData.canonical_url); - addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title); - addInfoOrDont('site_name', ogData.site_name); - addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description); - addInfoOrDont('color', metaData.theme_color); - - if(ogData.images?.length > 0) { - const image = ogData.images[0]; - info.image_url = image.secure_url ?? image.url; - if(image.width > 0) - info.image_width = image.width; - if(image.height > 0) - info.image_height = image.height; - if(image.type) - info.image_type = image.type; - if(image.alt) - info.image_alt = image.alt; - } else { - addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail); - addInfoOrDont('image_alt', twitterData.image_alt); - } - - if(ogData.audios?.length > 0) { - const audio = ogData.audios[0]; - info.audio_url = audio.secure_url ?? audio.url; - if(audio.type) - info.audio_type = audio.type; - } - - if(ogData.videos?.length > 0) { - const video = ogData.videos[0]; - info.video_url = video.secure_url ?? video.url; - if(video.width > 0) - info.video_width = video.width; - if(video.height > 0) - info.video_height = video.height; - if(video.type) - info.video_type = video.type; - if(video.tags?.length > 0) - info.video_tags = video.tags; - } else { - addInfoOrDont('video_url', twitterData.player); - addInfoOrDont('video_width', twitterData.player_width); - addInfoOrDont('video_height', twitterData.player_height); - } - - if(version < 2) { - info.image = info.image_url; - if(info.video_width > 0) - info.width = info.video_width; - else if(info.image_width > 0) - info.width = info.image_width; - if(info.video_height > 0) - info.height = info.video_height; - else if(info.image_height > 0) - info.height = info.image_height; - } - - const linkedDatas = extractLinkedData(html); - // idk what to do with this yet, only including this in debug mode for now - if(isDebug && linkedDatas.length > 0) - info._lds = linkedDatas; - } else { - const isAudio = contentType[0].startsWith('audio/'); - const isImage = contentType[0].startsWith('image/'); - const isVideo = contentType[0].startsWith('video/'); - - if(isAudio || isImage || isVideo) { - // this still seems like a terrible idea lol - const { code, stdout, stderr } = await (new Deno.Command('ffprobe', { - stdin: 'null', - stdout: 'piped', - stderr: 'piped', - args: [ - '-show_streams', - '-show_format', - '-print_format', 'json', - '-v', 'quiet', - '-i', url - ], - })).output(); - - if(code !== 0) { - console.error(new TextDecoder().decode(stderr)); - } else { - const probe = JSON.parse(new TextDecoder().decode(stdout).trim()); - if(isDebug) - info._ffprobe = probe; - - if(typeof probe?.format === 'object') { - const media = {}; - info.media = media; - media.confidence = Math.min(1, Math.max(0, probe.format.probe_score / 100.0)); - - const pfDuration = parseFloat(probe.format.duration); - if(!isNaN(pfDuration)) - media.duration = pfDuration; - - const pfSize = parseInt(probe.format.size); - if(!isNaN(pfSize)) - media.size = pfSize; - - const pfBitRate = parseInt(probe.format.bit_rate); - if(!isNaN(pfBitRate)) { - if(version < 2) - media.bitRate = pfBitRate; - else - media.bitrate = pfBitRate; - } - - // in Title case cus JS doesnt have an accessible lcfirst equivalent :p - const pftFields = ['Title', 'Artist', 'Album', 'Date', 'Comment', 'Genre']; - - if(Array.isArray(probe.streams)) - for(const stream of probe.streams) - if(stream.codec_type === 'video') { - media.width = stream.coded_width ?? stream.width ?? 0; - media.height = stream.coded_height ?? stream.height ?? 0; - - if(typeof stream.display_aspect_ratio === 'string') { - if(version < 2) - media.aspectRatio = stream.display_aspect_ratio; - else - media.aspect_ratio = stream.display_aspect_ratio; - } - } else if(stream.codec_type === 'audio') { - if(typeof stream.tags === 'object') - for(const pftFieldName of pftFields) { - const pftFieldValue = stream.tags[pftFieldName] - ?? probe.format.tags[pftFieldName.toLowerCase()] - ?? probe.format.tags[pftFieldName.toUpperCase()]; - - if(typeof pftFieldValue === 'string') { - if(typeof media.tags !== 'object') - media.tags = {}; - - media.tags[pftFieldName.toLowerCase()] = pftFieldValue; - } - } - } - - if(typeof probe.format.tags === 'object') - for(const pftFieldName of pftFields) { - const pftFieldValue = probe.format.tags[pftFieldName] - ?? probe.format.tags[pftFieldName.toLowerCase()] - ?? probe.format.tags[pftFieldName.toUpperCase()]; - - if(typeof pftFieldValue === 'string') { - if(typeof media.tags !== 'object') - media.tags = {}; - - media.tags[pftFieldName.toLowerCase()] = pftFieldValue; - } - } - } - } - - if(isAudio) { - info.audio_url = url; - info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/audio?url=${encodeURIComponent(url)}`; - info.image_type = 'image/png'; - - let title = ''; - if(typeof info.media.tags.artist === 'string') - title += `${info.media.tags.artist} - `; - if(typeof info.media.tags.title === 'string') - title += info.media.tags.title; - if(typeof info.media.tags.date === 'string') - title += ` (${info.media.tags.date})`; - title = title.trim(); - if(title !== '') - info.title = title; - - if(typeof info.media.tags.comment === 'string') - info.description = info.media.tags.comment.trim(); - } else if(isImage) { - info.image_url = url; - info.image_type = info.media_type; - - if(info.media.width > 0) - info.width = info.image_width = info.media.width; - if(info.media.height > 0) - info.height = info.image_height = info.media.height; - } else if(isVideo) { - info.video_url = url; - info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/video?url=${encodeURIComponent(url)}`; - info.image_type = 'image/png'; - - if(info.media.width > 0) - info.image_width = info.width = info.video_width = info.media.width; - if(info.media.height > 0) - info.image_height = info.height = info.video_height = info.media.height; - } - - if(version < 2) { - info.image = info.image_url; - - if(isAudio) - info.is_audio = true; - else if(isImage) - info.is_image = true; - else if(isVideo) - info.is_video = true; - } - } - } - - if(isAllowedOEmbedDomain(urlInfo.host)) { - const oEmbedData = await extractOEmbedData(response, html, url, urlInfo); - if(oEmbedData.version) - info.oembed = oEmbedData; - } - - if(version < 2 && info.video_url) { - if(info.video_url.startsWith('https://www.youtube.com/')) { - const ytVidUrl = new URL(info.video_url); - const ytVidUrlParams = new URLSearchParams(ytVidUrl.search); - info.type = 'youtube:video'; - info.youtube_video_id = basename(ytVidUrl.pathname); - if(ytVidUrlParams.has('list')) - info.youtube_playlist = ytVidUrlParams.get('list'); - } else if(info.video_url.startsWith('https://embed.nicovideo.jp/')) { - const nndVidUrl = new URL(info.video_url); - info.type = 'niconico:video'; - info.nicovideo_video_id = basename(nndVidUrl.pathname); - } - } - - return info; -}; - -const requestHandler = async (req: Request): Response => { +Deno.serve({ port }, async (req: Request): Response => { const url = new URL(req.url); const headers = { 'X-Powered-By': 'Uiharu' }; @@ -695,207 +46,24 @@ const requestHandler = async (req: Request): Response => { return new Response('', { status: 204, headers }); } - if(url.pathname === '/metadata') { - if(!['GET', 'HEAD', 'POST'].includes(req.method)) - return new Response('', { status: 405, headers }); + if(url.pathname === '/metadata') + return handleMetadataLookup(url, headers, req, cache, hostName); - const started = performance.now(); - const urlParams = new URLSearchParams(url.search); - - headers['Content-Type'] = 'application/json;charset=utf-8'; - - let urlParamRaw: String = ''; - if(req.method === 'POST') - urlParamRaw = (await readableStreamToString(req.body)).trim(); - else - urlParamRaw = urlParams.get('url')?.trim() ?? ''; - - if(urlParamRaw === '') - return new Response('{"error":"metadata:uri"}', { status: 400, headers }); - if(urlParamRaw.startsWith('//')) - urlParamRaw = 'https:' + urlParamRaw; - - let urlParam: URL; - try { - urlParam = new URL(urlParamRaw); - } catch(ex) { - return new Response('{"error":"metadata:uri"}', { status: 400, headers }); - } - - urlParamRaw = urlParam.toString(); - - const formatVersion = parseInt(urlParams.get('fv')) || 1; - - if(formatVersion < 1 || formatVersion > 2) - return new Response('{"error":"metadata:version"}', { status: 400, headers }); - - const urlHash = encodeBase64Url( - await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw)) - ); - const cacheKey = `uiharu:${appVersion}:md:fv${formatVersion}:${urlHash}`; - const cacheInfo = await cache.get(cacheKey); - if(cacheInfo !== undefined) - return new Response( - brotliDecompressSync(cacheInfo.value), - { - status: 200, - headers: { - ...headers, - ...{ - 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, - 'X-Uiharu-State': 'cache', - }, - }, - } - ); - - try { - const json = JSON.stringify( - await extractMetadata(formatVersion, urlParamRaw, urlParam) - ); - - cache.set(cacheKey, brotliCompressSync(json), { - compress: false, - lifetime: 600 - }); - - return new Response(json, { - status: 200, - headers: { - ...headers, - ...{ - 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`, - 'X-Uiharu-State': 'fresh', - }, - }, - }); - } catch(ex) { - console.error(ex); - return new Response('{"error":"metadata:lookup"}', { status: 500, headers }); - } - } - - if(url.pathname === '/metadata/batch') { - if(!['GET', 'HEAD', 'POST'].includes(req.method)) - return new Response('', { status: 405, headers }); - - return new Response('{"took":0,"results":[]}', { - headers: { - ...headers, - ...{ 'Content-Type': 'application/json' }, - }, - }); - } + if(url.pathname === '/metadata/batch') + return handleMetadataBatchLookup(headers, req); const isAudio = url.pathname === '/metadata/thumb/audio'; const isVideo = url.pathname === '/metadata/thumb/video'; - if(isAudio || isVideo) { - if(!['HEAD', 'GET'].includes(req.method)) - return new Response('', { status: 405, headers }); - - let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? ''; - if(urlParamRaw === '') - return new Response('missing url parameter', { status: 400, headers }); - - let scheme: String = ''; - try { - const urlParam = new URL(urlParamRaw); - if(typeof urlParam.protocol === 'string') - scheme = urlParam.protocol; - - urlParamRaw = urlParam.toString(); - } catch(ex) { - return new Response('invalid url parameter', { status: 400, headers }); - } - - if(!['http:', 'https:'].includes(scheme)) - return new Response('unsupported url scheme', { status: 400, headers }); - - // this seems like a terrible idea lol - const args = ['-i', urlParamRaw]; - if(isAudio) args.push('-an'); - args.push('-f'); - args.push('image2pipe'); - args.push('-c:v'); - args.push(isVideo ? 'png' : 'copy'); - args.push('-frames:v'); - args.push('1'); - args.push('-'); - - const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', { - stdin: 'null', - stdout: 'piped', - stderr: 'piped', - args, - })).output(); - - if(code !== 0) { - console.error(new TextDecoder().decode(stderr)); - return new Response('decode failed', { status: 500, headers }); - } - - // TODO: bother with cache someday maybe - const thumb = stdout; - - return new Response(thumb, { - headers: { - ...headers, - ...{ - 'Content-Type': 'image/png', - 'Cache-Control': 'public, max-age=31536000, immutable', - }, - }, - }); - } + if(isAudio || isVideo) + return handleThumbnailRetrieve(url, headers, req, isAudio, isVideo); // serving files from /public dir - if(['HEAD', 'GET'].includes(req.method)) { - const localPathPrefix = import.meta.dirname + '/public/'; - const localPathSuffix = pathNormalize(url.pathname === '/' ? '/index.html' : url.pathname); - const localPath = pathNormalize(localPathPrefix + localPathSuffix); - if(localPath.startsWith(localPathPrefix) && existsSync(localPath)) { - const mediaTypes = { - 'html': 'text/html;charset=utf-8', - 'css': 'text/css;charset=utf-8', - 'txt': 'text/plain;charset=utf-8', - 'png': 'image/png', - }; - - let mediaType: String = 'application/octet-stream'; - const dotIndex = localPathSuffix.lastIndexOf('.'); - if(dotIndex >= 0) { - const ext = localPathSuffix.substring(dotIndex + 1); - if(ext in mediaTypes) - mediaType = mediaTypes[ext]; - } - - return new Response('', { - status: 200, - headers: { - ...headers, - ...{ - 'Content-Type': mediaType, - 'X-Accel-Redirect': `/_public${localPathSuffix}`, - } - }, - }); - } - - // 404 page - return new Response('404 Not Found

404 Not Found

', { - status: 404, - headers: { - ...headers, - ...{ 'Content-Type': 'text/html;charset=utf-8' }, - }, - }); - } + if(['HEAD', 'GET'].includes(req.method)) + return handlePublicPath(headers, url.pathname); // 404 fallback return new Response('', { status: ['OPTIONS', 'HEAD', 'GET', 'POST'].includes(req.method) ? 404 : 405, headers, }); -}; - -Deno.serve({ port }, requestHandler); +});