diff --git a/src/consts.ts b/src/consts.ts
new file mode 100644
index 0000000..2746f29
--- /dev/null
+++ b/src/consts.ts
@@ -0,0 +1,6 @@
+import { existsSync } from "jsr:@std/fs";
+import { join } from "jsr:@std/path";
+
+export const APP_VERSION: string = '20241029';
+export const IS_DEBUG: bool = existsSync(join(import.meta.dirname, '/../.debug'));
+export const PUBLIC_DIR: string = join(import.meta.dirname, '/../public');
diff --git a/src/fetch.ts b/src/fetch.ts
new file mode 100644
index 0000000..a68afc6
--- /dev/null
+++ b/src/fetch.ts
@@ -0,0 +1,16 @@
+import { APP_VERSION } from './consts.ts';
+
+export async function fetchWithHeaders(url, init) {
+ if(!init)
+ init = {};
+ if(!init.headers)
+ init.headers = {};
+ if(!init.headers['Accept'])
+ init.headers['Accept'] = 'text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8';
+ if(!init.headers['Accept-Language'])
+ init.headers['Accept-Language'] = 'en-GB, en;q=0.9, ja-jp;q=0.6, *;q=0.5';
+ if(!init.headers['User-Agent'])
+ init.headers['User-Agent'] = `Mozilla/5.0 (compatible; Uiharu/${APP_VERSION}; +http://fii.moe/uiharu)`;
+
+ return await fetch(url, init);
+};
diff --git a/src/handlers/local.ts b/src/handlers/local.ts
new file mode 100644
index 0000000..e90271f
--- /dev/null
+++ b/src/handlers/local.ts
@@ -0,0 +1,55 @@
+import { PUBLIC_DIR } from '../consts.ts';
+import { normalize } from "jsr:@std/path/normalize";
+import { existsSync } from "jsr:@std/fs";
+
+const mediaTypes = {
+ 'html': 'text/html;charset=utf-8',
+ 'css': 'text/css;charset=utf-8',
+ 'txt': 'text/plain;charset=utf-8',
+ 'png': 'image/png',
+};
+
+function extractMediaType(path: string): string {
+ let mediaType: string = 'application/octet-stream';
+ const dotIndex = path.lastIndexOf('.');
+ if(dotIndex >= 0) {
+ const ext = path.substring(dotIndex + 1);
+ if(ext in mediaTypes)
+ mediaType = mediaTypes[ext];
+ }
+
+ return mediaType;
+}
+
+function publicPathExists(path: string): bool {
+ const full = normalize(PUBLIC_DIR + path);
+ return full.startsWith(PUBLIC_DIR) && existsSync(full);
+}
+
+export function handlePublicPath(
+ headers,
+ path: string
+): Response {
+ path = normalize(path === '/' ? '/index.html' : path);
+
+ if(publicPathExists(path))
+ return new Response('', {
+ status: 200,
+ headers: {
+ ...headers,
+ ...{
+ 'Content-Type': extractMediaType(path),
+ 'X-Accel-Redirect': `/_public${path}`,
+ }
+ },
+ });
+
+ // 404 page
+ return new Response('
404 Not Found404 Not Found
', {
+ status: 404,
+ headers: {
+ ...headers,
+ ...{ 'Content-Type': 'text/html;charset=utf-8' },
+ },
+ });
+}
diff --git a/src/handlers/lookup.ts b/src/handlers/lookup.ts
new file mode 100644
index 0000000..8ae71bb
--- /dev/null
+++ b/src/handlers/lookup.ts
@@ -0,0 +1,107 @@
+import { APP_VERSION } from '../consts.ts';
+import { readableStreamToString } from '../rs2str.ts';
+import { extractMetadata } from '../metadata.ts';
+import { encodeBase64Url } from "jsr:@std/encoding/base64url";
+import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
+import { MemcacheClient } from 'npm:memcache-client@^1.0.5';
+
+export async function handleMetadataLookup(
+ url: URL,
+ headers,
+ req: Request,
+ cache: MemcacheClient,
+ hostName: string
+): Response {
+ if(!['GET', 'HEAD', 'POST'].includes(req.method))
+ return new Response('', { status: 405, headers });
+
+ const started = performance.now();
+ const urlParams = new URLSearchParams(url.search);
+
+ headers['Content-Type'] = 'application/json;charset=utf-8';
+
+ let urlParamRaw: String = '';
+ if(req.method === 'POST')
+ urlParamRaw = (await readableStreamToString(req.body)).trim();
+ else
+ urlParamRaw = urlParams.get('url')?.trim() ?? '';
+
+ if(urlParamRaw === '')
+ return new Response('{"error":"metadata:uri"}', { status: 400, headers });
+ if(urlParamRaw.startsWith('//'))
+ urlParamRaw = 'https:' + urlParamRaw;
+
+ let urlParam: URL;
+ try {
+ urlParam = new URL(urlParamRaw);
+ } catch(ex) {
+ return new Response('{"error":"metadata:uri"}', { status: 400, headers });
+ }
+
+ urlParamRaw = urlParam.toString();
+
+ const formatVersion = parseInt(urlParams.get('fv')) || 1;
+
+ if(formatVersion < 1 || formatVersion > 2)
+ return new Response('{"error":"metadata:version"}', { status: 400, headers });
+
+ const urlHash = encodeBase64Url(
+ await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw))
+ );
+ const cacheKey = `uiharu:${APP_VERSION}:md:fv${formatVersion}:${urlHash}`;
+ const cacheInfo = await cache.get(cacheKey);
+ if(cacheInfo !== undefined)
+ return new Response(
+ brotliDecompressSync(cacheInfo.value),
+ {
+ status: 200,
+ headers: {
+ ...headers,
+ ...{
+ 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
+ 'X-Uiharu-State': 'cache',
+ },
+ },
+ }
+ );
+
+ try {
+ const json = JSON.stringify(
+ await extractMetadata(formatVersion, hostName, urlParamRaw, urlParam)
+ );
+
+ cache.set(cacheKey, brotliCompressSync(json), {
+ compress: false,
+ lifetime: 600
+ });
+
+ return new Response(json, {
+ status: 200,
+ headers: {
+ ...headers,
+ ...{
+ 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
+ 'X-Uiharu-State': 'fresh',
+ },
+ },
+ });
+ } catch(ex) {
+ console.error(ex);
+ return new Response('{"error":"metadata:lookup"}', { status: 500, headers });
+ }
+};
+
+export function handleMetadataBatchLookup(
+ headers,
+ req: Request
+): Response {
+ if(!['GET', 'HEAD', 'POST'].includes(req.method))
+ return new Response('', { status: 405, headers });
+
+ return new Response('{"took":0,"results":[]}', {
+ headers: {
+ ...headers,
+ ...{ 'Content-Type': 'application/json' },
+ },
+ });
+}
diff --git a/src/handlers/thumb.ts b/src/handlers/thumb.ts
new file mode 100644
index 0000000..a113683
--- /dev/null
+++ b/src/handlers/thumb.ts
@@ -0,0 +1,64 @@
+export async function handleThumbnailRetrieve(
+ url: URL,
+ headers,
+ req: Request,
+ isAudio: bool,
+ isVideo: bool
+): Response {
+ if(!['HEAD', 'GET'].includes(req.method))
+ return new Response('', { status: 405, headers });
+
+ let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
+ if(urlParamRaw === '')
+ return new Response('missing url parameter', { status: 400, headers });
+
+ let scheme: String = '';
+ try {
+ const urlParam = new URL(urlParamRaw);
+ if(typeof urlParam.protocol === 'string')
+ scheme = urlParam.protocol;
+
+ urlParamRaw = urlParam.toString();
+ } catch(ex) {
+ return new Response('invalid url parameter', { status: 400, headers });
+ }
+
+ if(!['http:', 'https:'].includes(scheme))
+ return new Response('unsupported url scheme', { status: 400, headers });
+
+ // this seems like a terrible idea lol
+ const args = ['-i', urlParamRaw];
+ if(isAudio) args.push('-an');
+ args.push('-f');
+ args.push('image2pipe');
+ args.push('-c:v');
+ args.push(isVideo ? 'png' : 'copy');
+ args.push('-frames:v');
+ args.push('1');
+ args.push('-');
+
+ const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', {
+ stdin: 'null',
+ stdout: 'piped',
+ stderr: 'piped',
+ args,
+ })).output();
+
+ if(code !== 0) {
+ console.error(new TextDecoder().decode(stderr));
+ return new Response('decode failed', { status: 500, headers });
+ }
+
+ // TODO: bother with cache someday maybe
+ const thumb = stdout;
+
+ return new Response(thumb, {
+ headers: {
+ ...headers,
+ ...{
+ 'Content-Type': 'image/png',
+ 'Cache-Control': 'public, max-age=31536000, immutable',
+ },
+ },
+ });
+}
diff --git a/src/metadata.ts b/src/metadata.ts
new file mode 100644
index 0000000..e66fa99
--- /dev/null
+++ b/src/metadata.ts
@@ -0,0 +1,274 @@
+import { IS_DEBUG } from './consts.ts';
+import { fetchWithHeaders } from './fetch.ts';
+import { readableStreamToString } from './rs2str.ts';
+import { extractOEmbedData, isAllowedOEmbedDomain } from './metadata/oembed.ts';
+import { extractOpenGraphData } from './metadata/og.ts';
+import { extractHtmlMetaData } from './metadata/html.ts';
+import { extractTwitterData } from './metadata/twitter.ts';
+import { extractLinkedData } from './metadata/ld.ts';
+import * as cheerio from 'npm:cheerio@^1.0.0';
+import { basename } from "jsr:@std/path";
+import { parseMediaType } from "jsr:@std/media-types";
+
+export async function extractMetadata(
+ version: number,
+ hostName: string,
+ url: string,
+ urlInfo: URL
+) {
+ const response = await fetchWithHeaders(url);
+ const contentTypeRaw = response.headers.get('content-type') ?? '';
+ const contentType = parseMediaType(contentTypeRaw);
+
+ const info = {};
+ const addInfoOrDont = (prop, value) => {
+ if(value !== null && value !== undefined)
+ info[prop] = value;
+ };
+
+ info.url = url;
+ info.title = decodeURIComponent(basename(urlInfo.pathname));
+ info.site_name = urlInfo.host;
+
+ if(contentType[0])
+ info.media_type = contentType[0];
+
+ let html = undefined;
+
+ if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) {
+ html = cheerio.load(await readableStreamToString(response.body));
+
+ const metaData = extractHtmlMetaData(html);
+ const ogData = extractOpenGraphData(html);
+ const twitterData = extractTwitterData(html);
+
+ addInfoOrDont('url', ogData.url ?? metaData.canonical_url);
+ addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title);
+ addInfoOrDont('site_name', ogData.site_name);
+ addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description);
+ addInfoOrDont('color', metaData.theme_color);
+
+ if(ogData.images?.length > 0) {
+ const image = ogData.images[0];
+ info.image_url = image.secure_url ?? image.url;
+ if(image.width > 0)
+ info.image_width = image.width;
+ if(image.height > 0)
+ info.image_height = image.height;
+ if(image.type)
+ info.image_type = image.type;
+ if(image.alt)
+ info.image_alt = image.alt;
+ } else {
+ addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail);
+ addInfoOrDont('image_alt', twitterData.image_alt);
+ }
+
+ if(ogData.audios?.length > 0) {
+ const audio = ogData.audios[0];
+ info.audio_url = audio.secure_url ?? audio.url;
+ if(audio.type)
+ info.audio_type = audio.type;
+ }
+
+ if(ogData.videos?.length > 0) {
+ const video = ogData.videos[0];
+ info.video_url = video.secure_url ?? video.url;
+ if(video.width > 0)
+ info.video_width = video.width;
+ if(video.height > 0)
+ info.video_height = video.height;
+ if(video.type)
+ info.video_type = video.type;
+ if(video.tags?.length > 0)
+ info.video_tags = video.tags;
+ } else {
+ addInfoOrDont('video_url', twitterData.player);
+ addInfoOrDont('video_width', twitterData.player_width);
+ addInfoOrDont('video_height', twitterData.player_height);
+ }
+
+ if(version < 2) {
+ info.image = info.image_url;
+ if(info.video_width > 0)
+ info.width = info.video_width;
+ else if(info.image_width > 0)
+ info.width = info.image_width;
+ if(info.video_height > 0)
+ info.height = info.video_height;
+ else if(info.image_height > 0)
+ info.height = info.image_height;
+ }
+
+ const linkedDatas = extractLinkedData(html);
+ // idk what to do with this yet, only including this in debug mode for now
+ if(IS_DEBUG && linkedDatas.length > 0)
+ info._lds = linkedDatas;
+ } else {
+ const isAudio = contentType[0].startsWith('audio/');
+ const isImage = contentType[0].startsWith('image/');
+ const isVideo = contentType[0].startsWith('video/');
+
+ if(isAudio || isImage || isVideo) {
+ // this still seems like a terrible idea lol
+ const { code, stdout, stderr } = await (new Deno.Command('ffprobe', {
+ stdin: 'null',
+ stdout: 'piped',
+ stderr: 'piped',
+ args: [
+ '-show_streams',
+ '-show_format',
+ '-print_format', 'json',
+ '-v', 'quiet',
+ '-i', url
+ ],
+ })).output();
+
+ if(code !== 0) {
+ console.error(new TextDecoder().decode(stderr));
+ } else {
+ const probe = JSON.parse(new TextDecoder().decode(stdout).trim());
+ if(IS_DEBUG)
+ info._ffprobe = probe;
+
+ if(typeof probe?.format === 'object') {
+ const media = {};
+ info.media = media;
+ media.confidence = Math.min(1, Math.max(0, probe.format.probe_score / 100.0));
+
+ const pfDuration = parseFloat(probe.format.duration);
+ if(!isNaN(pfDuration))
+ media.duration = pfDuration;
+
+ const pfSize = parseInt(probe.format.size);
+ if(!isNaN(pfSize))
+ media.size = pfSize;
+
+ const pfBitRate = parseInt(probe.format.bit_rate);
+ if(!isNaN(pfBitRate)) {
+ if(version < 2)
+ media.bitRate = pfBitRate;
+ else
+ media.bitrate = pfBitRate;
+ }
+
+ // in Title case cus JS doesnt have an accessible lcfirst equivalent :p
+ const pftFields = ['Title', 'Artist', 'Album', 'Date', 'Comment', 'Genre'];
+
+ if(Array.isArray(probe.streams))
+ for(const stream of probe.streams)
+ if(stream.codec_type === 'video') {
+ media.width = stream.coded_width ?? stream.width ?? 0;
+ media.height = stream.coded_height ?? stream.height ?? 0;
+
+ if(typeof stream.display_aspect_ratio === 'string') {
+ if(version < 2)
+ media.aspectRatio = stream.display_aspect_ratio;
+ else
+ media.aspect_ratio = stream.display_aspect_ratio;
+ }
+ } else if(stream.codec_type === 'audio') {
+ if(typeof stream.tags === 'object')
+ for(const pftFieldName of pftFields) {
+ const pftFieldValue = stream.tags[pftFieldName]
+ ?? probe.format.tags[pftFieldName.toLowerCase()]
+ ?? probe.format.tags[pftFieldName.toUpperCase()];
+
+ if(typeof pftFieldValue === 'string') {
+ if(typeof media.tags !== 'object')
+ media.tags = {};
+
+ media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
+ }
+ }
+ }
+
+ if(typeof probe.format.tags === 'object')
+ for(const pftFieldName of pftFields) {
+ const pftFieldValue = probe.format.tags[pftFieldName]
+ ?? probe.format.tags[pftFieldName.toLowerCase()]
+ ?? probe.format.tags[pftFieldName.toUpperCase()];
+
+ if(typeof pftFieldValue === 'string') {
+ if(typeof media.tags !== 'object')
+ media.tags = {};
+
+ media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
+ }
+ }
+ }
+ }
+
+ if(isAudio) {
+ info.audio_url = url;
+ info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/audio?url=${encodeURIComponent(url)}`;
+ info.image_type = 'image/png';
+
+ let title = '';
+ if(typeof info.media.tags.artist === 'string')
+ title += `${info.media.tags.artist} - `;
+ if(typeof info.media.tags.title === 'string')
+ title += info.media.tags.title;
+ if(typeof info.media.tags.date === 'string')
+ title += ` (${info.media.tags.date})`;
+ title = title.trim();
+ if(title !== '')
+ info.title = title;
+
+ if(typeof info.media.tags.comment === 'string')
+ info.description = info.media.tags.comment.trim();
+ } else if(isImage) {
+ info.image_url = url;
+ info.image_type = info.media_type;
+
+ if(info.media.width > 0)
+ info.width = info.image_width = info.media.width;
+ if(info.media.height > 0)
+ info.height = info.image_height = info.media.height;
+ } else if(isVideo) {
+ info.video_url = url;
+ info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/video?url=${encodeURIComponent(url)}`;
+ info.image_type = 'image/png';
+
+ if(info.media.width > 0)
+ info.image_width = info.width = info.video_width = info.media.width;
+ if(info.media.height > 0)
+ info.image_height = info.height = info.video_height = info.media.height;
+ }
+
+ if(version < 2) {
+ info.image = info.image_url;
+
+ if(isAudio)
+ info.is_audio = true;
+ else if(isImage)
+ info.is_image = true;
+ else if(isVideo)
+ info.is_video = true;
+ }
+ }
+ }
+
+ if(isAllowedOEmbedDomain(urlInfo.host)) {
+ const oEmbedData = await extractOEmbedData(response, html, url, urlInfo);
+ if(oEmbedData.version)
+ info.oembed = oEmbedData;
+ }
+
+ if(version < 2 && info.video_url) {
+ if(info.video_url.startsWith('https://www.youtube.com/')) {
+ const ytVidUrl = new URL(info.video_url);
+ const ytVidUrlParams = new URLSearchParams(ytVidUrl.search);
+ info.type = 'youtube:video';
+ info.youtube_video_id = basename(ytVidUrl.pathname);
+ if(ytVidUrlParams.has('list'))
+ info.youtube_playlist = ytVidUrlParams.get('list');
+ } else if(info.video_url.startsWith('https://embed.nicovideo.jp/')) {
+ const nndVidUrl = new URL(info.video_url);
+ info.type = 'niconico:video';
+ info.nicovideo_video_id = basename(nndVidUrl.pathname);
+ }
+ }
+
+ return info;
+};
diff --git a/src/metadata/html.ts b/src/metadata/html.ts
new file mode 100644
index 0000000..068f7aa
--- /dev/null
+++ b/src/metadata/html.ts
@@ -0,0 +1,31 @@
+import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts";
+
+export function extractHtmlMetaData(html) {
+ const values = {};
+
+ const titleTag = html('title')?.first()?.text().trim() ?? '';
+ if(titleTag.length > 0)
+ values.title = titleTag;
+
+ const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? '';
+ if(metaDescriptionTag.length > 0)
+ values.description = metaDescriptionTag;
+
+ const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? '';
+ if(metaThumbnailTag.length > 0)
+ values.thumbnail = metaThumbnailTag;
+
+ const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? '';
+ if(metaThemeColorTag.length > 0)
+ values.theme_color = Color.string(metaThemeColorTag).hex();
+
+ const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? '';
+ if(linkImageSrcTag.length > 0)
+ values.image = linkImageSrcTag;
+
+ const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? '';
+ if(linkCanonicalTag.length > 0)
+ values.canonical_url = linkCanonicalTag;
+
+ return values;
+};
diff --git a/src/metadata/ld.ts b/src/metadata/ld.ts
new file mode 100644
index 0000000..974ae0c
--- /dev/null
+++ b/src/metadata/ld.ts
@@ -0,0 +1,13 @@
+export function extractLinkedData(html) {
+ const values = [];
+
+ const tags = html('script[type="application/ld+json"]');
+ for(const tagInfo of tags)
+ try {
+ values.push(JSON.parse(html(tagInfo).text().trim()));
+ } catch(ex) {
+ console.error(ex);
+ }
+
+ return values;
+};
diff --git a/src/metadata/oembed.ts b/src/metadata/oembed.ts
new file mode 100644
index 0000000..c7fa699
--- /dev/null
+++ b/src/metadata/oembed.ts
@@ -0,0 +1,166 @@
+import { fetchWithHeaders } from '../fetch.ts';
+
+// copied from wordpress source sorta
+// i was going to make this a config setting but some services dont provide an alternate url
+// so it will become more involved in the future
+const allowOEmbed: String[] = [
+ '.youtube.com',
+ '.youtu.be',
+ '.vimeo.com',
+ '.dailymotion.com',
+ '.dai.ly',
+ '.flickr.com',
+ '.flic.kr',
+ '.smugmug.com',
+ '.scribd.com',
+ '.wordpress.tv',
+ '.crowdsignal.net',
+ '.polldaddy.com',
+ '.poll.fm',
+ '.survey.fm',
+ '.twitter.com',
+ '.soundcloud.com',
+ '.spotify.com',
+ '.imgur.com',
+ '.issuu.com',
+ '.mixcloud.com',
+ '.ted.com',
+ '.animoto.com',
+ '.video214.com',
+ '.tumblr.com',
+ '.kickstarter.com',
+ '.kck.st',
+ '.cloudup.com',
+ '.reverbnation.com',
+ '.videopress.com',
+ '.reddit.com',
+ '.speakerdeck.com',
+ '.screencast.com',
+ '.amazon.com',
+ '.amazon.com.mx',
+ '.amazon.com.br',
+ '.amazon.ca',
+ '.amazon.co.uk',
+ '.amazon.de',
+ '.amazon.fr',
+ '.amazon.it',
+ '.amazon.es',
+ '.amazon.in',
+ '.amazon.nl',
+ '.amazon.ru',
+ '.amazon.co.jp',
+ '.amazon.com.au',
+ '.amazon.cn',
+ '.a.co',
+ '.amzn.to',
+ '.amzn.eu',
+ '.amzn.in',
+ '.amzn.asia',
+ '.z.cn',
+ '.somecards.com',
+ '.some.ly',
+ '.tiktok.com',
+ '.pinterest.com',
+ '.pinterest.com.au',
+ '.pinterest.com.mx',
+ '.wolframcloud.com',
+ '.instagram.com',
+ '.facebook.com',
+ '.pca.st',
+ '.anghami.com',
+ '.bsky.app',
+ '.apple.com',
+ '.flashii.net',
+ '.fii.moe',
+ '.tako.zone',
+ '.patchii.net',
+ '.railgun.sh',
+ '.flash.moe',
+ '.edgii.net',
+];
+
+function isDomainSuffix(known: string, user: string) {
+ if(!known.startsWith('.'))
+ known = '.' + known;
+ if(!user.startsWith('.'))
+ user = '.' + user;
+
+ return user.endsWith(known);
+};
+
+function parseLinkHeader(header: string) {
+ const links = [];
+
+ const lines = header.split(',');
+ for(const key in lines) {
+ const parts = lines[key].trim().split(';').map(part => part.trim());
+
+ let href = parts.shift();
+ if(typeof href !== 'string' || !href.startsWith('<') || !href.endsWith('>'))
+ continue;
+
+ href = decodeURI(href.slice(1, -1));
+ const link = {};
+ links.push(link);
+
+ for(const part of parts) {
+ const attr = part.split('=', 2);
+ let value = attr[1];
+ if(value.startsWith('"') && value.endsWith('"'))
+ value = value.slice(1, -1);
+
+ link[attr[0]] = value;
+ }
+
+ // applying this last to avoid tomfoolery :3
+ link.href = href;
+ }
+
+ return links;
+};
+
+export function isAllowedOEmbedDomain(domain: string): Boolean {
+ if(!domain.startsWith('.'))
+ domain = '.' + domain;
+
+ for(const suffix of allowOEmbed)
+ if(domain.endsWith(suffix))
+ return true;
+
+ return false;
+};
+
+export async function extractOEmbedData(response: Response, html, url: string, urlInfo: URL) {
+ let oEmbedUrl: string = '';
+
+ // TODO: maintain a fucking list because its too difficult for services to just provide tags
+ if(isDomainSuffix('x.com', urlInfo.host) || isDomainSuffix('twitter.com', urlInfo.host))
+ oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`;
+ else if(isDomainSuffix('soundcloud.com', urlInfo.host))
+ oEmbedUrl = `https://soundcloud.com/oembed?format=json&url=${encodeURIComponent(url)}`;
+ else if(isDomainSuffix('tiktok.com', urlInfo.host))
+ oEmbedUrl = `https://www.tiktok.com/oembed?url=${encodeURIComponent(url)}`;
+ else if(isDomainSuffix('mixcloud.com', urlInfo.host))
+ oEmbedUrl = `https://app.mixcloud.com/oembed/?url=${encodeURIComponent(url)}`;
+ else if(html !== undefined)
+ oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? '';
+
+ if(oEmbedUrl === '') {
+ const links = parseLinkHeader(response.headers.get('link') ?? '');
+ for(const link of links)
+ if(link.rel === 'alternate' && link.type === 'application/json+oembed') {
+ oEmbedUrl = link.href;
+ break;
+ }
+ }
+
+ if(oEmbedUrl === '')
+ return {};
+
+ try {
+ return (await fetchWithHeaders(oEmbedUrl)).json();
+ } catch(ex) {
+ console.error(ex);
+ return {};
+ }
+};
diff --git a/src/metadata/og.ts b/src/metadata/og.ts
new file mode 100644
index 0000000..ac0f0b1
--- /dev/null
+++ b/src/metadata/og.ts
@@ -0,0 +1,117 @@
+export function extractOpenGraphData(html) {
+ const values = {};
+
+ // this is hateful
+ const properties = {
+ 'url': { type: 'url' },
+ 'type': { type: 'str' },
+ 'title': { type: 'str' },
+ 'locale': { type: 'str' },
+ 'locale:alternate': { type: 'str', array: 'locales' },
+ 'description': { type: 'str' },
+ 'determiner': { type: 'str' },
+ 'site_name': { type: 'str' },
+
+ 'image': { alias: 'image:url', array: 'images' },
+ 'image:url': { of: 'image', type: 'url' },
+ 'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] },
+ 'image:type': { of: 'image', type: 'mime' },
+ 'image:width': { of: 'image', type: 'int' },
+ 'image:height': { of: 'image', type: 'int' },
+ 'image:alt': { of: 'image', type: 'string' },
+
+ 'video': { alias: 'video:url', array: 'videos' },
+ 'video:url': { of: 'video', type: 'url' },
+ 'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] },
+ 'video:type': { of: 'video', type: 'mime' },
+ 'video:width': { of: 'video', type: 'int' },
+ 'video:height': { of: 'video', type: 'int' },
+ 'video:tag': { of: 'video', type: 'str', array: 'tags' },
+
+ 'audio': { alias: 'audio:url', array: 'audios' },
+ 'audio:url': { of: 'audio', type: 'url' },
+ 'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] },
+ 'audio:type': { of: 'audio', type: 'mime' },
+ };
+
+ const tags = html('meta[property^="og:"]');
+ for(const tagInfo of tags) {
+ const tag = html(tagInfo);
+
+ let name = (tag.attr('property')?.trim() ?? '').substring(3);
+ if(!(name in properties))
+ continue;
+
+ let value = tag.attr('content')?.trim() ?? '';
+
+ let propInfo = properties[name];
+ let target = values;
+
+ if(propInfo.alias) {
+ name = propInfo.alias;
+ propInfo = properties[name];
+ }
+
+ if(propInfo.of) {
+ name = name.substring(propInfo.of.length + 1);
+ const objInfo = properties[propInfo.of];
+
+ if(objInfo.array) {
+ if(objInfo.array in target)
+ target = target[objInfo.array];
+ else
+ target = target[objInfo.array] = [];
+
+ const lastItem = target[target.length - 1];
+ if(lastItem === undefined || name in lastItem) {
+ const newItem = {};
+ target.push(newItem);
+ target = newItem;
+ } else
+ target = lastItem;
+ } else {
+ if(!(name in target))
+ target[name] = {};
+
+ target = target[name];
+ }
+ }
+
+ if(propInfo.array) {
+ if(propInfo.array in target)
+ target = target[propInfo.array];
+ else
+ target = target[propInfo.array] = [];
+ } else if(name in target)
+ continue;
+
+ if(propInfo.type === 'int')
+ value = parseInt(value);
+ else {
+ if(propInfo.type === 'mime') {
+ // world's most naive validation
+ if(value.indexOf('/') < 0)
+ value = undefined;
+ } else if(propInfo.type === 'url') {
+ try {
+ const protos = propInfo.protos ?? ['https:', 'http:'];
+ if(!protos.includes(new URL(value).protocol))
+ value = undefined;
+ } catch(ex) {
+ console.error(ex);
+ value = undefined;
+ }
+ } else if(propInfo.type !== 'str')
+ value = undefined;
+ }
+
+ if(value) {
+ if(propInfo.array)
+ target.push(value);
+ else
+ target[name] = value;
+ }
+ }
+
+ return values;
+};
diff --git a/src/metadata/twitter.ts b/src/metadata/twitter.ts
new file mode 100644
index 0000000..36e675f
--- /dev/null
+++ b/src/metadata/twitter.ts
@@ -0,0 +1,27 @@
+export function extractTwitterData(html) {
+ const values = {};
+ const properties = [
+ 'card',
+ 'site',
+ 'site:id',
+ 'creator',
+ 'creator:id',
+ 'description',
+ 'title',
+ 'image',
+ 'image:alt',
+ 'player',
+ 'player:width',
+ 'player:height',
+ 'player:stream',
+ ];
+
+ for(const property of properties) {
+ const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? '';
+ if(tag.length > 0)
+ values[property.replace(':', '_')] = tag;
+ }
+
+ return values;
+};
+
diff --git a/src/rs2str.ts b/src/rs2str.ts
new file mode 100644
index 0000000..4e656e8
--- /dev/null
+++ b/src/rs2str.ts
@@ -0,0 +1,18 @@
+export async function readableStreamToString(stream?: ReadableStream): string {
+ if(stream === null)
+ return '';
+
+ const reader = stream.getReader();
+ const decoder = new TextDecoder;
+ let result = '';
+
+ for(;;) {
+ const { done, value } = await reader.read();
+ if(done) break;
+ result += decoder.decode(value, { stream: true });
+ }
+
+ result += decoder.decode();
+
+ return result;
+};
diff --git a/uiharu.ts b/uiharu.ts
index daef591..0dd6963 100644
--- a/uiharu.ts
+++ b/uiharu.ts
@@ -1,12 +1,7 @@
-import * as cheerio from 'npm:cheerio@^1.0.0';
+import { handleMetadataLookup, handleMetadataBatchLookup } from './src/handlers/lookup.ts';
+import { handleThumbnailRetrieve } from './src/handlers/thumb.ts';
+import { handlePublicPath } from './src/handlers/local.ts';
import { MemcacheClient } from 'npm:memcache-client@^1.0.5';
-import { existsSync } from "jsr:@std/fs";
-import { basename, join as pathJoin } from "jsr:@std/path";
-import { normalize as pathNormalize } from "jsr:@std/path/normalize";
-import { encodeBase64Url } from "jsr:@std/encoding/base64url";
-import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
-import { parseMediaType } from "jsr:@std/media-types";
-import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts";
// todo: these should not be hardcoded lol
const hostName: String = 'uiharu.edgii.net';
@@ -18,84 +13,7 @@ const allowedOrigins: String[] = [
'sockchat.edgii.net',
'ajaxchat.edgii.net',
];
-const allowOEmbed: String[] = [ // copied from wordpress source sorta
- '.youtube.com',
- '.youtu.be',
- '.vimeo.com',
- '.dailymotion.com',
- '.dai.ly',
- '.flickr.com',
- '.flic.kr',
- '.smugmug.com',
- '.scribd.com',
- '.wordpress.tv',
- '.crowdsignal.net',
- '.polldaddy.com',
- '.poll.fm',
- '.survey.fm',
- '.twitter.com',
- '.soundcloud.com',
- '.spotify.com',
- '.imgur.com',
- '.issuu.com',
- '.mixcloud.com',
- '.ted.com',
- '.animoto.com',
- '.video214.com',
- '.tumblr.com',
- '.kickstarter.com',
- '.kck.st',
- '.cloudup.com',
- '.reverbnation.com',
- '.videopress.com',
- '.reddit.com',
- '.speakerdeck.com',
- '.screencast.com',
- '.amazon.com',
- '.amazon.com.mx',
- '.amazon.com.br',
- '.amazon.ca',
- '.amazon.co.uk',
- '.amazon.de',
- '.amazon.fr',
- '.amazon.it',
- '.amazon.es',
- '.amazon.in',
- '.amazon.nl',
- '.amazon.ru',
- '.amazon.co.jp',
- '.amazon.com.au',
- '.amazon.cn',
- '.a.co',
- '.amzn.to',
- '.amzn.eu',
- '.amzn.in',
- '.amzn.asia',
- '.z.cn',
- '.somecards.com',
- '.some.ly',
- '.tiktok.com',
- '.pinterest.com',
- '.pinterest.com.au',
- '.pinterest.com.mx',
- '.wolframcloud.com',
- '.instagram.com',
- '.facebook.com',
- '.pca.st',
- '.anghami.com',
- '.bsky.app',
- '.apple.com',
- '.flashii.net',
- '.fii.moe',
- '.tako.zone',
- '.patchii.net',
- '.railgun.sh',
- '.flash.moe',
- '.edgii.net',
-];
-const appVersion: String = '20241029';
-const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug'));
const cache: MemcacheClient = new MemcacheClient({
server: memcacheServer,
compressor: {
@@ -105,574 +23,7 @@ const cache: MemcacheClient = new MemcacheClient({
},
});
-const uiharuFetch = async (url, init) => {
- if(!init)
- init = {};
- if(!init.headers)
- init.headers = {};
- if(!init.headers['Accept'])
- init.headers['Accept'] = 'text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8';
- if(!init.headers['Accept-Language'])
- init.headers['Accept-Language'] = 'en-GB, en;q=0.9, ja-jp;q=0.6, *;q=0.5';
- if(!init.headers['User-Agent'])
- init.headers['User-Agent'] = `Mozilla/5.0 (compatible; Uiharu/${appVersion}; +http://fii.moe/uiharu)`;
-
- return await fetch(url, init);
-};
-
-const readableStreamToString = async (stream?: ReadableStream): string => {
- if(stream === null)
- return '';
-
- const reader = stream.getReader();
- const decoder = new TextDecoder;
- let result = '';
-
- for(;;) {
- const { done, value } = await reader.read();
- if(done) break;
- result += decoder.decode(value, { stream: true });
- }
-
- result += decoder.decode();
-
- return result;
-};
-
-const isDomainSuffix = (known: string, user: string) => {
- if(!known.startsWith('.'))
- known = '.' + known;
- if(!user.startsWith('.'))
- user = '.' + user;
-
- return user.endsWith(known);
-};
-
-const isAllowedOEmbedDomain = (domain: string): Boolean => {
- if(!domain.startsWith('.'))
- domain = '.' + domain;
-
- for(const suffix of allowOEmbed)
- if(domain.endsWith(suffix))
- return true;
-
- return false;
-};
-
-const extractHtmlMetaData = (html) => {
- const values = {};
-
- const titleTag = html('title')?.first()?.text().trim() ?? '';
- if(titleTag.length > 0)
- values.title = titleTag;
-
- const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? '';
- if(metaDescriptionTag.length > 0)
- values.description = metaDescriptionTag;
-
- const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? '';
- if(metaThumbnailTag.length > 0)
- values.thumbnail = metaThumbnailTag;
-
- const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? '';
- if(metaThemeColorTag.length > 0)
- values.theme_color = Color.string(metaThemeColorTag).hex();
-
- const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? '';
- if(linkImageSrcTag.length > 0)
- values.image = linkImageSrcTag;
-
- const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? '';
- if(linkCanonicalTag.length > 0)
- values.canonical_url = linkCanonicalTag;
-
- return values;
-};
-
-const extractOpenGraphData = (html) => {
- const values = {};
-
- // this is hateful
- const properties = {
- 'url': { type: 'url' },
- 'type': { type: 'str' },
- 'title': { type: 'str' },
- 'locale': { type: 'str' },
- 'locale:alternate': { type: 'str', array: 'locales' },
- 'description': { type: 'str' },
- 'determiner': { type: 'str' },
- 'site_name': { type: 'str' },
-
- 'image': { alias: 'image:url', array: 'images' },
- 'image:url': { of: 'image', type: 'url' },
- 'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] },
- 'image:type': { of: 'image', type: 'mime' },
- 'image:width': { of: 'image', type: 'int' },
- 'image:height': { of: 'image', type: 'int' },
- 'image:alt': { of: 'image', type: 'string' },
-
- 'video': { alias: 'video:url', array: 'videos' },
- 'video:url': { of: 'video', type: 'url' },
- 'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] },
- 'video:type': { of: 'video', type: 'mime' },
- 'video:width': { of: 'video', type: 'int' },
- 'video:height': { of: 'video', type: 'int' },
- 'video:tag': { of: 'video', type: 'str', array: 'tags' },
-
- 'audio': { alias: 'audio:url', array: 'audios' },
- 'audio:url': { of: 'audio', type: 'url' },
- 'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] },
- 'audio:type': { of: 'audio', type: 'mime' },
- };
-
- const tags = html('meta[property^="og:"]');
- for(const tagInfo of tags) {
- const tag = html(tagInfo);
-
- let name = (tag.attr('property')?.trim() ?? '').substring(3);
- if(!(name in properties))
- continue;
-
- let value = tag.attr('content')?.trim() ?? '';
-
- let propInfo = properties[name];
- let target = values;
-
- if(propInfo.alias) {
- name = propInfo.alias;
- propInfo = properties[name];
- }
-
- if(propInfo.of) {
- name = name.substring(propInfo.of.length + 1);
- const objInfo = properties[propInfo.of];
-
- if(objInfo.array) {
- if(objInfo.array in target)
- target = target[objInfo.array];
- else
- target = target[objInfo.array] = [];
-
- const lastItem = target[target.length - 1];
- if(lastItem === undefined || name in lastItem) {
- const newItem = {};
- target.push(newItem);
- target = newItem;
- } else
- target = lastItem;
- } else {
- if(!(name in target))
- target[name] = {};
-
- target = target[name];
- }
- }
-
- if(propInfo.array) {
- if(propInfo.array in target)
- target = target[propInfo.array];
- else
- target = target[propInfo.array] = [];
- } else if(name in target)
- continue;
-
- if(propInfo.type === 'int')
- value = parseInt(value);
- else {
- if(propInfo.type === 'mime') {
- // world's most naive validation
- if(value.indexOf('/') < 0)
- value = undefined;
- } else if(propInfo.type === 'url') {
- try {
- const protos = propInfo.protos ?? ['https:', 'http:'];
- if(!protos.includes(new URL(value).protocol))
- value = undefined;
- } catch(ex) {
- console.error(ex);
- value = undefined;
- }
- } else if(propInfo.type !== 'str')
- value = undefined;
- }
-
- if(value) {
- if(propInfo.array)
- target.push(value);
- else
- target[name] = value;
- }
- }
-
- return values;
-};
-
-const extractTwitterData = (html) => {
- const values = {};
- const properties = [
- 'card',
- 'site',
- 'site:id',
- 'creator',
- 'creator:id',
- 'description',
- 'title',
- 'image',
- 'image:alt',
- 'player',
- 'player:width',
- 'player:height',
- 'player:stream',
- ];
-
- for(const property of properties) {
- const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? '';
- if(tag.length > 0)
- values[property.replace(':', '_')] = tag;
- }
-
- return values;
-};
-
-const extractLinkedData = (html) => {
- const values = [];
-
- const tags = html('script[type="application/ld+json"]');
- for(const tagInfo of tags)
- try {
- values.push(JSON.parse(html(tagInfo).text().trim()));
- } catch(ex) {
- console.error(ex);
- }
-
- return values;
-};
-
-const parseLinkHeader = (header: string) => {
- const links = [];
-
- const lines = header.split(',');
- for(const key in lines) {
- const parts = lines[key].trim().split(';').map(part => part.trim());
-
- let href = parts.shift();
- if(typeof href !== 'string' || !href.startsWith('<') || !href.endsWith('>'))
- continue;
-
- href = decodeURI(href.slice(1, -1));
- const link = {};
- links.push(link);
-
- for(const part of parts) {
- const attr = part.split('=', 2);
- let value = attr[1];
- if(value.startsWith('"') && value.endsWith('"'))
- value = value.slice(1, -1);
-
- link[attr[0]] = value;
- }
-
- // applying this last to avoid tomfoolery :3
- link.href = href;
- }
-
- return links;
-};
-
-const extractOEmbedData = async (response: Response, html, url: string, urlInfo: URL) => {
- let oEmbedUrl: string = '';
-
- // TODO: maintain a fucking list because its too difficult for services to just provide tags
- if(isDomainSuffix('x.com', urlInfo.host) || isDomainSuffix('twitter.com', urlInfo.host))
- oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`;
- else if(isDomainSuffix('soundcloud.com', urlInfo.host))
- oEmbedUrl = `https://soundcloud.com/oembed?format=json&url=${encodeURIComponent(url)}`;
- else if(isDomainSuffix('tiktok.com', urlInfo.host))
- oEmbedUrl = `https://www.tiktok.com/oembed?url=${encodeURIComponent(url)}`;
- else if(isDomainSuffix('mixcloud.com', urlInfo.host))
- oEmbedUrl = `https://app.mixcloud.com/oembed/?url=${encodeURIComponent(url)}`;
- else if(html !== undefined)
- oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? '';
-
- if(oEmbedUrl === '') {
- const links = parseLinkHeader(response.headers.get('link') ?? '');
- for(const link of links)
- if(link.rel === 'alternate' && link.type === 'application/json+oembed') {
- oEmbedUrl = link.href;
- break;
- }
- }
-
- if(oEmbedUrl === '')
- return {};
-
- try {
- return (await uiharuFetch(oEmbedUrl)).json();
- } catch(ex) {
- console.error(ex);
- return {};
- }
-};
-
-const extractMetadata = async (version: number, url: string, urlInfo: URL) => {
- const response = await uiharuFetch(url);
- const contentTypeRaw = response.headers.get('content-type') ?? '';
- const contentType = parseMediaType(contentTypeRaw);
-
- const info = {};
- const addInfoOrDont = (prop, value) => {
- if(value !== null && value !== undefined)
- info[prop] = value;
- };
-
- info.url = url;
- info.title = decodeURIComponent(basename(urlInfo.pathname));
- info.site_name = urlInfo.host;
-
- if(contentType[0])
- info.media_type = contentType[0];
-
- let html = undefined;
-
- if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) {
- html = cheerio.load(await readableStreamToString(response.body));
-
- const metaData = extractHtmlMetaData(html);
- const ogData = extractOpenGraphData(html);
- const twitterData = extractTwitterData(html);
-
- addInfoOrDont('url', ogData.url ?? metaData.canonical_url);
- addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title);
- addInfoOrDont('site_name', ogData.site_name);
- addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description);
- addInfoOrDont('color', metaData.theme_color);
-
- if(ogData.images?.length > 0) {
- const image = ogData.images[0];
- info.image_url = image.secure_url ?? image.url;
- if(image.width > 0)
- info.image_width = image.width;
- if(image.height > 0)
- info.image_height = image.height;
- if(image.type)
- info.image_type = image.type;
- if(image.alt)
- info.image_alt = image.alt;
- } else {
- addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail);
- addInfoOrDont('image_alt', twitterData.image_alt);
- }
-
- if(ogData.audios?.length > 0) {
- const audio = ogData.audios[0];
- info.audio_url = audio.secure_url ?? audio.url;
- if(audio.type)
- info.audio_type = audio.type;
- }
-
- if(ogData.videos?.length > 0) {
- const video = ogData.videos[0];
- info.video_url = video.secure_url ?? video.url;
- if(video.width > 0)
- info.video_width = video.width;
- if(video.height > 0)
- info.video_height = video.height;
- if(video.type)
- info.video_type = video.type;
- if(video.tags?.length > 0)
- info.video_tags = video.tags;
- } else {
- addInfoOrDont('video_url', twitterData.player);
- addInfoOrDont('video_width', twitterData.player_width);
- addInfoOrDont('video_height', twitterData.player_height);
- }
-
- if(version < 2) {
- info.image = info.image_url;
- if(info.video_width > 0)
- info.width = info.video_width;
- else if(info.image_width > 0)
- info.width = info.image_width;
- if(info.video_height > 0)
- info.height = info.video_height;
- else if(info.image_height > 0)
- info.height = info.image_height;
- }
-
- const linkedDatas = extractLinkedData(html);
- // idk what to do with this yet, only including this in debug mode for now
- if(isDebug && linkedDatas.length > 0)
- info._lds = linkedDatas;
- } else {
- const isAudio = contentType[0].startsWith('audio/');
- const isImage = contentType[0].startsWith('image/');
- const isVideo = contentType[0].startsWith('video/');
-
- if(isAudio || isImage || isVideo) {
- // this still seems like a terrible idea lol
- const { code, stdout, stderr } = await (new Deno.Command('ffprobe', {
- stdin: 'null',
- stdout: 'piped',
- stderr: 'piped',
- args: [
- '-show_streams',
- '-show_format',
- '-print_format', 'json',
- '-v', 'quiet',
- '-i', url
- ],
- })).output();
-
- if(code !== 0) {
- console.error(new TextDecoder().decode(stderr));
- } else {
- const probe = JSON.parse(new TextDecoder().decode(stdout).trim());
- if(isDebug)
- info._ffprobe = probe;
-
- if(typeof probe?.format === 'object') {
- const media = {};
- info.media = media;
- media.confidence = Math.min(1, Math.max(0, probe.format.probe_score / 100.0));
-
- const pfDuration = parseFloat(probe.format.duration);
- if(!isNaN(pfDuration))
- media.duration = pfDuration;
-
- const pfSize = parseInt(probe.format.size);
- if(!isNaN(pfSize))
- media.size = pfSize;
-
- const pfBitRate = parseInt(probe.format.bit_rate);
- if(!isNaN(pfBitRate)) {
- if(version < 2)
- media.bitRate = pfBitRate;
- else
- media.bitrate = pfBitRate;
- }
-
- // in Title case cus JS doesnt have an accessible lcfirst equivalent :p
- const pftFields = ['Title', 'Artist', 'Album', 'Date', 'Comment', 'Genre'];
-
- if(Array.isArray(probe.streams))
- for(const stream of probe.streams)
- if(stream.codec_type === 'video') {
- media.width = stream.coded_width ?? stream.width ?? 0;
- media.height = stream.coded_height ?? stream.height ?? 0;
-
- if(typeof stream.display_aspect_ratio === 'string') {
- if(version < 2)
- media.aspectRatio = stream.display_aspect_ratio;
- else
- media.aspect_ratio = stream.display_aspect_ratio;
- }
- } else if(stream.codec_type === 'audio') {
- if(typeof stream.tags === 'object')
- for(const pftFieldName of pftFields) {
- const pftFieldValue = stream.tags[pftFieldName]
- ?? probe.format.tags[pftFieldName.toLowerCase()]
- ?? probe.format.tags[pftFieldName.toUpperCase()];
-
- if(typeof pftFieldValue === 'string') {
- if(typeof media.tags !== 'object')
- media.tags = {};
-
- media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
- }
- }
- }
-
- if(typeof probe.format.tags === 'object')
- for(const pftFieldName of pftFields) {
- const pftFieldValue = probe.format.tags[pftFieldName]
- ?? probe.format.tags[pftFieldName.toLowerCase()]
- ?? probe.format.tags[pftFieldName.toUpperCase()];
-
- if(typeof pftFieldValue === 'string') {
- if(typeof media.tags !== 'object')
- media.tags = {};
-
- media.tags[pftFieldName.toLowerCase()] = pftFieldValue;
- }
- }
- }
- }
-
- if(isAudio) {
- info.audio_url = url;
- info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/audio?url=${encodeURIComponent(url)}`;
- info.image_type = 'image/png';
-
- let title = '';
- if(typeof info.media.tags.artist === 'string')
- title += `${info.media.tags.artist} - `;
- if(typeof info.media.tags.title === 'string')
- title += info.media.tags.title;
- if(typeof info.media.tags.date === 'string')
- title += ` (${info.media.tags.date})`;
- title = title.trim();
- if(title !== '')
- info.title = title;
-
- if(typeof info.media.tags.comment === 'string')
- info.description = info.media.tags.comment.trim();
- } else if(isImage) {
- info.image_url = url;
- info.image_type = info.media_type;
-
- if(info.media.width > 0)
- info.width = info.image_width = info.media.width;
- if(info.media.height > 0)
- info.height = info.image_height = info.media.height;
- } else if(isVideo) {
- info.video_url = url;
- info.image_url = `${version < 2 ? '' : 'https:'}//${hostName}/metadata/thumb/video?url=${encodeURIComponent(url)}`;
- info.image_type = 'image/png';
-
- if(info.media.width > 0)
- info.image_width = info.width = info.video_width = info.media.width;
- if(info.media.height > 0)
- info.image_height = info.height = info.video_height = info.media.height;
- }
-
- if(version < 2) {
- info.image = info.image_url;
-
- if(isAudio)
- info.is_audio = true;
- else if(isImage)
- info.is_image = true;
- else if(isVideo)
- info.is_video = true;
- }
- }
- }
-
- if(isAllowedOEmbedDomain(urlInfo.host)) {
- const oEmbedData = await extractOEmbedData(response, html, url, urlInfo);
- if(oEmbedData.version)
- info.oembed = oEmbedData;
- }
-
- if(version < 2 && info.video_url) {
- if(info.video_url.startsWith('https://www.youtube.com/')) {
- const ytVidUrl = new URL(info.video_url);
- const ytVidUrlParams = new URLSearchParams(ytVidUrl.search);
- info.type = 'youtube:video';
- info.youtube_video_id = basename(ytVidUrl.pathname);
- if(ytVidUrlParams.has('list'))
- info.youtube_playlist = ytVidUrlParams.get('list');
- } else if(info.video_url.startsWith('https://embed.nicovideo.jp/')) {
- const nndVidUrl = new URL(info.video_url);
- info.type = 'niconico:video';
- info.nicovideo_video_id = basename(nndVidUrl.pathname);
- }
- }
-
- return info;
-};
-
-const requestHandler = async (req: Request): Response => {
+Deno.serve({ port }, async (req: Request): Response => {
const url = new URL(req.url);
const headers = { 'X-Powered-By': 'Uiharu' };
@@ -695,207 +46,24 @@ const requestHandler = async (req: Request): Response => {
return new Response('', { status: 204, headers });
}
- if(url.pathname === '/metadata') {
- if(!['GET', 'HEAD', 'POST'].includes(req.method))
- return new Response('', { status: 405, headers });
+ if(url.pathname === '/metadata')
+ return handleMetadataLookup(url, headers, req, cache, hostName);
- const started = performance.now();
- const urlParams = new URLSearchParams(url.search);
-
- headers['Content-Type'] = 'application/json;charset=utf-8';
-
- let urlParamRaw: String = '';
- if(req.method === 'POST')
- urlParamRaw = (await readableStreamToString(req.body)).trim();
- else
- urlParamRaw = urlParams.get('url')?.trim() ?? '';
-
- if(urlParamRaw === '')
- return new Response('{"error":"metadata:uri"}', { status: 400, headers });
- if(urlParamRaw.startsWith('//'))
- urlParamRaw = 'https:' + urlParamRaw;
-
- let urlParam: URL;
- try {
- urlParam = new URL(urlParamRaw);
- } catch(ex) {
- return new Response('{"error":"metadata:uri"}', { status: 400, headers });
- }
-
- urlParamRaw = urlParam.toString();
-
- const formatVersion = parseInt(urlParams.get('fv')) || 1;
-
- if(formatVersion < 1 || formatVersion > 2)
- return new Response('{"error":"metadata:version"}', { status: 400, headers });
-
- const urlHash = encodeBase64Url(
- await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw))
- );
- const cacheKey = `uiharu:${appVersion}:md:fv${formatVersion}:${urlHash}`;
- const cacheInfo = await cache.get(cacheKey);
- if(cacheInfo !== undefined)
- return new Response(
- brotliDecompressSync(cacheInfo.value),
- {
- status: 200,
- headers: {
- ...headers,
- ...{
- 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
- 'X-Uiharu-State': 'cache',
- },
- },
- }
- );
-
- try {
- const json = JSON.stringify(
- await extractMetadata(formatVersion, urlParamRaw, urlParam)
- );
-
- cache.set(cacheKey, brotliCompressSync(json), {
- compress: false,
- lifetime: 600
- });
-
- return new Response(json, {
- status: 200,
- headers: {
- ...headers,
- ...{
- 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
- 'X-Uiharu-State': 'fresh',
- },
- },
- });
- } catch(ex) {
- console.error(ex);
- return new Response('{"error":"metadata:lookup"}', { status: 500, headers });
- }
- }
-
- if(url.pathname === '/metadata/batch') {
- if(!['GET', 'HEAD', 'POST'].includes(req.method))
- return new Response('', { status: 405, headers });
-
- return new Response('{"took":0,"results":[]}', {
- headers: {
- ...headers,
- ...{ 'Content-Type': 'application/json' },
- },
- });
- }
+ if(url.pathname === '/metadata/batch')
+ return handleMetadataBatchLookup(headers, req);
const isAudio = url.pathname === '/metadata/thumb/audio';
const isVideo = url.pathname === '/metadata/thumb/video';
- if(isAudio || isVideo) {
- if(!['HEAD', 'GET'].includes(req.method))
- return new Response('', { status: 405, headers });
-
- let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
- if(urlParamRaw === '')
- return new Response('missing url parameter', { status: 400, headers });
-
- let scheme: String = '';
- try {
- const urlParam = new URL(urlParamRaw);
- if(typeof urlParam.protocol === 'string')
- scheme = urlParam.protocol;
-
- urlParamRaw = urlParam.toString();
- } catch(ex) {
- return new Response('invalid url parameter', { status: 400, headers });
- }
-
- if(!['http:', 'https:'].includes(scheme))
- return new Response('unsupported url scheme', { status: 400, headers });
-
- // this seems like a terrible idea lol
- const args = ['-i', urlParamRaw];
- if(isAudio) args.push('-an');
- args.push('-f');
- args.push('image2pipe');
- args.push('-c:v');
- args.push(isVideo ? 'png' : 'copy');
- args.push('-frames:v');
- args.push('1');
- args.push('-');
-
- const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', {
- stdin: 'null',
- stdout: 'piped',
- stderr: 'piped',
- args,
- })).output();
-
- if(code !== 0) {
- console.error(new TextDecoder().decode(stderr));
- return new Response('decode failed', { status: 500, headers });
- }
-
- // TODO: bother with cache someday maybe
- const thumb = stdout;
-
- return new Response(thumb, {
- headers: {
- ...headers,
- ...{
- 'Content-Type': 'image/png',
- 'Cache-Control': 'public, max-age=31536000, immutable',
- },
- },
- });
- }
+ if(isAudio || isVideo)
+ return handleThumbnailRetrieve(url, headers, req, isAudio, isVideo);
// serving files from /public dir
- if(['HEAD', 'GET'].includes(req.method)) {
- const localPathPrefix = import.meta.dirname + '/public/';
- const localPathSuffix = pathNormalize(url.pathname === '/' ? '/index.html' : url.pathname);
- const localPath = pathNormalize(localPathPrefix + localPathSuffix);
- if(localPath.startsWith(localPathPrefix) && existsSync(localPath)) {
- const mediaTypes = {
- 'html': 'text/html;charset=utf-8',
- 'css': 'text/css;charset=utf-8',
- 'txt': 'text/plain;charset=utf-8',
- 'png': 'image/png',
- };
-
- let mediaType: String = 'application/octet-stream';
- const dotIndex = localPathSuffix.lastIndexOf('.');
- if(dotIndex >= 0) {
- const ext = localPathSuffix.substring(dotIndex + 1);
- if(ext in mediaTypes)
- mediaType = mediaTypes[ext];
- }
-
- return new Response('', {
- status: 200,
- headers: {
- ...headers,
- ...{
- 'Content-Type': mediaType,
- 'X-Accel-Redirect': `/_public${localPathSuffix}`,
- }
- },
- });
- }
-
- // 404 page
- return new Response('404 Not Found404 Not Found
', {
- status: 404,
- headers: {
- ...headers,
- ...{ 'Content-Type': 'text/html;charset=utf-8' },
- },
- });
- }
+ if(['HEAD', 'GET'].includes(req.method))
+ return handlePublicPath(headers, url.pathname);
// 404 fallback
return new Response('', {
status: ['OPTIONS', 'HEAD', 'GET', 'POST'].includes(req.method) ? 404 : 405,
headers,
});
-};
-
-Deno.serve({ port }, requestHandler);
+});