Switched to Deno + WIP.
This commit is contained in:
parent
956402ca97
commit
030625afa8
9 changed files with 738 additions and 1396 deletions
30
LICENCE
Normal file
30
LICENCE
Normal file
|
@ -0,0 +1,30 @@
|
|||
Copyright (c) 2021-2024, flashwave <me@flash.moe>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted (subject to the limitations in the disclaimer
|
||||
below) provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
|
||||
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
6
README.md
Normal file
6
README.md
Normal file
|
@ -0,0 +1,6 @@
|
|||
# Uiharu
|
||||
|
||||
Uiharu is a service that looks up metadata for a given URL.
|
||||
|
||||
It runs on the [Deno](https://deno.com/) Javascript runtime and requires a [Memcached](https://www.memcached.org/) server for caching.
|
||||
HTML wrangling is done using the [Cheerio](https://github.com/cheeriojs/cheerio) library.
|
28
deno.lock
Normal file
28
deno.lock
Normal file
|
@ -0,0 +1,28 @@
|
|||
{
|
||||
"version": "4",
|
||||
"specifiers": {
|
||||
"jsr:@std/fs@*": "1.0.5",
|
||||
"jsr:@std/path@*": "1.0.7",
|
||||
"jsr:@std/path@^1.0.7": "1.0.7"
|
||||
},
|
||||
"jsr": {
|
||||
"@std/fs@1.0.5": {
|
||||
"integrity": "41806ad6823d0b5f275f9849a2640d87e4ef67c51ee1b8fb02426f55e02fd44e",
|
||||
"dependencies": [
|
||||
"jsr:@std/path@^1.0.7"
|
||||
]
|
||||
},
|
||||
"@std/path@1.0.7": {
|
||||
"integrity": "76a689e07f0e15dcc6002ec39d0866797e7156629212b28f27179b8a5c3b33a1"
|
||||
}
|
||||
},
|
||||
"workspace": {
|
||||
"packageJson": {
|
||||
"dependencies": [
|
||||
"npm:cheerio@1",
|
||||
"npm:express@^5.0.1",
|
||||
"npm:memcache-client@^1.0.5"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
9
ecosystem.config.js
Normal file
9
ecosystem.config.js
Normal file
|
@ -0,0 +1,9 @@
|
|||
module.exports = {
|
||||
apps: [
|
||||
{
|
||||
script: './uiharu.ts',
|
||||
interpreter: 'deno',
|
||||
interpreterArgs: 'run --allow-net --allow-read --allow-run --allow-env',
|
||||
},
|
||||
],
|
||||
};
|
75
extract.mjs
75
extract.mjs
|
@ -1,75 +0,0 @@
|
|||
import * as cheerio from 'cheerio';
|
||||
|
||||
const readStdIn = () => {
|
||||
return new Promise((resolve, reject) => {
|
||||
let stdIn = '';
|
||||
|
||||
process.stdin.on('data', data => stdIn += data);
|
||||
process.stdin.on('end', () => resolve(stdIn));
|
||||
process.stdin.on('error', err => reject(err));
|
||||
});
|
||||
};
|
||||
|
||||
const $ = cheerio.load(await readStdIn());
|
||||
const info = {
|
||||
title: '',
|
||||
metaTitle: '',
|
||||
desc: '',
|
||||
siteName: '',
|
||||
image: '',
|
||||
colour: '',
|
||||
type: 'website',
|
||||
};
|
||||
|
||||
const titleTag = $('title').first();
|
||||
if(titleTag.length > 0)
|
||||
info.title = titleTag.text().trim();
|
||||
|
||||
const metaTags = $('meta');
|
||||
for(const elemInfo of metaTags) {
|
||||
const elem = $(elemInfo);
|
||||
|
||||
const nameAttr = (elem.attr('name') ?? elem.attr('property') ?? '').trim();
|
||||
if(nameAttr === '')
|
||||
continue;
|
||||
|
||||
const valueAttr = (elem.attr('value') ?? elem.attr('content') ?? '').trim();
|
||||
|
||||
switch(nameAttr) {
|
||||
case 'og:title':
|
||||
case 'twitter:title':
|
||||
if(info.metaTitle === '' || valueAttr.length > info.metaTitle.length)
|
||||
info.metaTitle = valueAttr;
|
||||
break;
|
||||
|
||||
case 'description':
|
||||
case 'og:description':
|
||||
case 'twitter:description':
|
||||
if(info.desc === '' || valueAttr.length > info.desc.length)
|
||||
info.desc = valueAttr;
|
||||
break;
|
||||
|
||||
case 'og:site_name':
|
||||
if(info.siteName === '')
|
||||
info.siteName = valueAttr;
|
||||
break;
|
||||
|
||||
case 'og:image':
|
||||
case 'twitter:image':
|
||||
if(info.image === '')
|
||||
info.image = valueAttr;
|
||||
break;
|
||||
|
||||
case 'theme-color':
|
||||
if(info.colour === '')
|
||||
info.colour = valueAttr;
|
||||
break;
|
||||
|
||||
case 'og:type':
|
||||
if(info.type === '')
|
||||
info.type = `website:${valueAttr}`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(JSON.stringify(info));
|
1237
package-lock.json
generated
1237
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -1,7 +0,0 @@
|
|||
{
|
||||
"dependencies": {
|
||||
"cheerio": "^1.0.0",
|
||||
"express": "^5.0.1",
|
||||
"memcache-client": "^1.0.5"
|
||||
}
|
||||
}
|
77
uiharu.js
77
uiharu.js
|
@ -1,77 +0,0 @@
|
|||
const express = require('express');
|
||||
const fs = require('fs');
|
||||
const memcache = require('memcache-client');
|
||||
const { join: pathJoin } = require('path');
|
||||
|
||||
// todo: these should not be hardcoded lol
|
||||
const port = 3009;
|
||||
const memcacheServer = "localhost:11211";
|
||||
const allowedOrigins = [
|
||||
'edgii.net',
|
||||
'chat.edgii.net',
|
||||
'sockchat.edgii.net',
|
||||
'ajaxchat.edgii.net',
|
||||
];
|
||||
|
||||
const app = express();
|
||||
const isDebug = fs.existsSync(pathJoin(__dirname, '.debug'));
|
||||
const cache = new memcache.MemcacheClient({ server: memcacheServer });
|
||||
|
||||
const handleMetadata = (res, req, url) => {
|
||||
//
|
||||
};
|
||||
|
||||
app.use((req, res, next) => {
|
||||
res.set('X-Powered-By', 'Uiharu');
|
||||
|
||||
const origin = req.get('origin');
|
||||
if(origin !== undefined) {
|
||||
const originObj = new URL(origin);
|
||||
if(!allowedOrigins.includes(originObj.host)) {
|
||||
res.status(403).end();
|
||||
return;
|
||||
}
|
||||
|
||||
res.set('Access-Control-Allow-Origin', origin);
|
||||
res.set('Vary', 'Origin');
|
||||
}
|
||||
|
||||
if(req.method === 'OPTIONS')
|
||||
res.set('Access-Control-Allow-Methods', 'OPTIONS, GET, POST');
|
||||
|
||||
next();
|
||||
});
|
||||
|
||||
app.use(express.static('public'));
|
||||
|
||||
app.get('/metadata', (req, res) => {
|
||||
res.status(501).send('Not Implemented');
|
||||
});
|
||||
|
||||
app.post('/metadata', (req, res) => {
|
||||
res.status(501).send('Not Implemented');
|
||||
});
|
||||
|
||||
app.get('/metadata/batch', (req, res) => {
|
||||
res.type('application/json')
|
||||
.send('{"took":0,"results":[]}')
|
||||
.end();
|
||||
});
|
||||
|
||||
app.post('/metadata/batch', (req, res) => {
|
||||
res.type('application/json')
|
||||
.send('{"took":0,"results":[]}')
|
||||
.end();
|
||||
});
|
||||
|
||||
app.get('/metadata/thumb/audio', (req, res) => {
|
||||
res.status(501).send('Not Implemented');
|
||||
});
|
||||
|
||||
app.get('/metadata/thumb/video', (req, res) => {
|
||||
res.status(501).send('Not Implemented');
|
||||
});
|
||||
|
||||
app.listen(port, () => {
|
||||
console.log(`Uiharu listening to port ${port}!`);
|
||||
});
|
665
uiharu.ts
Normal file
665
uiharu.ts
Normal file
|
@ -0,0 +1,665 @@
|
|||
import * as cheerio from 'npm:cheerio@^1.0.0';
|
||||
import { MemcacheClient } from 'npm:memcache-client@^1.0.5';
|
||||
import { existsSync } from "jsr:@std/fs";
|
||||
import { basename, join as pathJoin } from "jsr:@std/path";
|
||||
import { normalize as pathNormalize } from "jsr:@std/path/normalize";
|
||||
import { encodeBase64Url } from "jsr:@std/encoding/base64url";
|
||||
import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
|
||||
import { parseMediaType } from "jsr:@std/media-types";
|
||||
import { Color } from "https://deno.land/x/color@v0.3.0/mod.ts";
|
||||
|
||||
// todo: these should not be hardcoded lol
|
||||
const port: Number = 3009;
|
||||
const memcacheServer: String = "127.0.0.1:11211";
|
||||
const allowedOrigins: String[] = [
|
||||
'edgii.net',
|
||||
'chat.edgii.net',
|
||||
'sockchat.edgii.net',
|
||||
'ajaxchat.edgii.net',
|
||||
];
|
||||
const allowOEmbed: String[] = [ // copied from wordpress source sorta
|
||||
'.youtube.com',
|
||||
'.youtu.be',
|
||||
'.vimeo.com',
|
||||
'.dailymotion.com',
|
||||
'.dai.ly',
|
||||
'.flickr.com',
|
||||
'.flic.kr',
|
||||
'.smugmug.com',
|
||||
'.scribd.com',
|
||||
'.wordpress.tv',
|
||||
'.crowdsignal.net',
|
||||
'.polldaddy.com',
|
||||
'.poll.fm',
|
||||
'.survey.fm',
|
||||
'.twitter.com',
|
||||
'.soundcloud.com',
|
||||
'.spotify.com',
|
||||
'.imgur.com',
|
||||
'.issuu.com',
|
||||
'.mixcloud.com',
|
||||
'.ted.com',
|
||||
'.animoto.com',
|
||||
'.video214.com',
|
||||
'.tumblr.com',
|
||||
'.kickstarter.com',
|
||||
'.kck.st',
|
||||
'.cloudup.com',
|
||||
'.reverbnation.com',
|
||||
'.videopress.com',
|
||||
'.reddit.com',
|
||||
'.speakerdeck.com',
|
||||
'.screencast.com',
|
||||
'.amazon.com',
|
||||
'.amazon.com.mx',
|
||||
'.amazon.com.br',
|
||||
'.amazon.ca',
|
||||
'.amazon.co.uk',
|
||||
'.amazon.de',
|
||||
'.amazon.fr',
|
||||
'.amazon.it',
|
||||
'.amazon.es',
|
||||
'.amazon.in',
|
||||
'.amazon.nl',
|
||||
'.amazon.ru',
|
||||
'.amazon.co.jp',
|
||||
'.amazon.com.au',
|
||||
'.amazon.cn',
|
||||
'.a.co',
|
||||
'.amzn.to',
|
||||
'.amzn.eu',
|
||||
'.amzn.in',
|
||||
'.amzn.asia',
|
||||
'.z.cn',
|
||||
'.somecards.com',
|
||||
'.some.ly',
|
||||
'.tiktok.com',
|
||||
'.pinterest.com',
|
||||
'.pinterest.com.au',
|
||||
'.pinterest.com.mx',
|
||||
'.wolframcloud.com',
|
||||
'.pca.st',
|
||||
'.anghami.com',
|
||||
'.bsky.app',
|
||||
'.apple.com',
|
||||
];
|
||||
|
||||
const isDebug: Boolean = existsSync(pathJoin(import.meta.dirname, '.debug'));
|
||||
const cache: MemcacheClient = new MemcacheClient({
|
||||
server: memcacheServer,
|
||||
compressor: {
|
||||
// fuck it lol
|
||||
compressSync: buffer => buffer,
|
||||
decompressSync: buffer => buffer,
|
||||
},
|
||||
});
|
||||
|
||||
const readableStreamToString = async (stream?: ReadableStream): string => {
|
||||
if(stream === null)
|
||||
return '';
|
||||
|
||||
const reader = stream.getReader();
|
||||
const decoder = new TextDecoder;
|
||||
let result = '';
|
||||
|
||||
for(;;) {
|
||||
const { done, value } = await reader.read();
|
||||
if(done) break;
|
||||
result += decoder.decode(value, { stream: true });
|
||||
}
|
||||
|
||||
result += decoder.decode();
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
const isAllowedOEmbedDomain = (domain: string): Boolean => {
|
||||
if(!domain.startsWith('.'))
|
||||
domain = '.' + domain;
|
||||
|
||||
for(const suffix of allowOEmbed)
|
||||
if(domain.endsWith(suffix))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
const extractHtmlMetaData = (html) => {
|
||||
const values = {};
|
||||
|
||||
const titleTag = html('title')?.first()?.text().trim() ?? '';
|
||||
if(titleTag.length > 0)
|
||||
values.title = titleTag;
|
||||
|
||||
const metaDescriptionTag = html('meta[name="description"]').first()?.attr('content')?.trim() ?? '';
|
||||
if(metaDescriptionTag.length > 0)
|
||||
values.description = metaDescriptionTag;
|
||||
|
||||
const metaThumbnailTag = html('meta[name="thumbnail"]').first()?.attr('content')?.trim() ?? '';
|
||||
if(metaThumbnailTag.length > 0)
|
||||
values.thumbnail = metaThumbnailTag;
|
||||
|
||||
const metaThemeColorTag = html('meta[name="theme-color"]').first()?.attr('content')?.trim() ?? '';
|
||||
if(metaThemeColorTag.length > 0)
|
||||
values.theme_color = Color.string(metaThemeColorTag).hex();
|
||||
|
||||
const linkImageSrcTag = html('link[rel="image_src"]').first()?.attr('href')?.trim() ?? '';
|
||||
if(linkImageSrcTag.length > 0)
|
||||
values.image = linkImageSrcTag;
|
||||
|
||||
const linkCanonicalTag = html('link[rel="canonical"]').first()?.attr('href')?.trim() ?? '';
|
||||
if(linkCanonicalTag.length > 0)
|
||||
values.canonical_url = linkCanonicalTag;
|
||||
|
||||
return values;
|
||||
};
|
||||
|
||||
const extractOpenGraphData = (html) => {
|
||||
const values = {};
|
||||
|
||||
// this is hateful
|
||||
const properties = {
|
||||
'url': { type: 'url' },
|
||||
'type': { type: 'str' },
|
||||
'title': { type: 'str' },
|
||||
'locale': { type: 'str' },
|
||||
'locale:alternate': { type: 'str', array: 'locales' },
|
||||
'description': { type: 'str' },
|
||||
'determiner': { type: 'str' },
|
||||
'site_name': { type: 'str' },
|
||||
|
||||
'image': { alias: 'image:url', array: 'images' },
|
||||
'image:url': { of: 'image', type: 'url' },
|
||||
'image:secure_url': { of: 'image', type: 'url', protos: ['https:'] },
|
||||
'image:type': { of: 'image', type: 'mime' },
|
||||
'image:width': { of: 'image', type: 'int' },
|
||||
'image:height': { of: 'image', type: 'int' },
|
||||
'image:alt': { of: 'image', type: 'string' },
|
||||
|
||||
'video': { alias: 'video:url', array: 'videos' },
|
||||
'video:url': { of: 'video', type: 'url' },
|
||||
'video:secure_url': { of: 'video', type: 'url', protos: ['https:'] },
|
||||
'video:type': { of: 'video', type: 'mime' },
|
||||
'video:width': { of: 'video', type: 'int' },
|
||||
'video:height': { of: 'video', type: 'int' },
|
||||
'video:tag': { of: 'video', type: 'str', array: 'tags' },
|
||||
|
||||
'audio': { alias: 'audio:url', array: 'audios' },
|
||||
'audio:url': { of: 'audio', type: 'url' },
|
||||
'audio:secure_url': { of: 'audio', type: 'url', protos: ['https:'] },
|
||||
'audio:type': { of: 'audio', type: 'mime' },
|
||||
};
|
||||
|
||||
const tags = html('meta[property^="og:"]');
|
||||
for(const tagInfo of tags) {
|
||||
const tag = html(tagInfo);
|
||||
|
||||
let name = (tag.attr('property')?.trim() ?? '').substring(3);
|
||||
if(!(name in properties))
|
||||
continue;
|
||||
|
||||
let value = tag.attr('content')?.trim() ?? '';
|
||||
|
||||
let propInfo = properties[name];
|
||||
let target = values;
|
||||
|
||||
if(propInfo.alias) {
|
||||
name = propInfo.alias;
|
||||
propInfo = properties[name];
|
||||
}
|
||||
|
||||
if(propInfo.of) {
|
||||
name = name.substring(propInfo.of.length + 1);
|
||||
const objInfo = properties[propInfo.of];
|
||||
|
||||
if(objInfo.array) {
|
||||
if(objInfo.array in target)
|
||||
target = target[objInfo.array];
|
||||
else
|
||||
target = target[objInfo.array] = [];
|
||||
|
||||
const lastItem = target[target.length - 1];
|
||||
if(lastItem === undefined || name in lastItem) {
|
||||
const newItem = {};
|
||||
target.push(newItem);
|
||||
target = newItem;
|
||||
} else
|
||||
target = lastItem;
|
||||
} else {
|
||||
if(!(name in target))
|
||||
target[name] = {};
|
||||
|
||||
target = target[name];
|
||||
}
|
||||
}
|
||||
|
||||
if(propInfo.array) {
|
||||
if(propInfo.array in target)
|
||||
target = target[propInfo.array];
|
||||
else
|
||||
target = target[propInfo.array] = [];
|
||||
} else if(name in target)
|
||||
continue;
|
||||
|
||||
if(propInfo.type === 'int')
|
||||
value = parseInt(value);
|
||||
else {
|
||||
if(propInfo.type === 'mime') {
|
||||
// world's most naive validation
|
||||
if(value.indexOf('/') < 0)
|
||||
value = undefined;
|
||||
} else if(propInfo.type === 'url') {
|
||||
try {
|
||||
const protos = propInfo.protos ?? ['https:', 'http:'];
|
||||
if(!protos.includes(new URL(value).protocol))
|
||||
value = undefined;
|
||||
} catch(ex) {
|
||||
console.error(ex);
|
||||
value = undefined;
|
||||
}
|
||||
} else if(propInfo.type !== 'str')
|
||||
value = undefined;
|
||||
}
|
||||
|
||||
if(value) {
|
||||
if(propInfo.array)
|
||||
target.push(value);
|
||||
else
|
||||
target[name] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return values;
|
||||
};
|
||||
|
||||
const extractTwitterData = (html) => {
|
||||
const values = {};
|
||||
const properties = [
|
||||
'card',
|
||||
'site',
|
||||
'site:id',
|
||||
'creator',
|
||||
'creator:id',
|
||||
'description',
|
||||
'title',
|
||||
'image',
|
||||
'image:alt',
|
||||
'player',
|
||||
'player:width',
|
||||
'player:height',
|
||||
'player:stream',
|
||||
];
|
||||
|
||||
for(const property of properties) {
|
||||
const tag = html(`meta[name="twitter:${property}"]`)?.first()?.attr('content')?.trim() ?? '';
|
||||
if(tag.length > 0)
|
||||
values[property.replace(':', '_')] = tag;
|
||||
}
|
||||
|
||||
return values;
|
||||
};
|
||||
|
||||
const extractLinkedData = (html) => {
|
||||
const values = [];
|
||||
|
||||
const tags = html('script[type="application/ld+json"]');
|
||||
for(const tagInfo of tags)
|
||||
try {
|
||||
values.push(JSON.parse(html(tagInfo).text().trim()));
|
||||
} catch(ex) {
|
||||
console.error(ex);
|
||||
}
|
||||
|
||||
return values;
|
||||
};
|
||||
|
||||
const extractOEmbedData = async (html, url: string, urlInfo: URL) => {
|
||||
// TODO: this should also support header discovery
|
||||
|
||||
let oEmbedUrl: string = '';
|
||||
|
||||
// idk how long i'll bother with this for
|
||||
if(urlInfo.host === 'x.com' || urlInfo.host === 'twitter.com')
|
||||
oEmbedUrl = `https://publish.twitter.com/oembed?dnt=true&omit_script=true&url=${encodeURIComponent(url)}`;
|
||||
else
|
||||
oEmbedUrl = html('link[rel="alternate"][type="application/json+oembed"]').first()?.attr('href')?.trim() ?? '';
|
||||
|
||||
if(oEmbedUrl === '')
|
||||
return {};
|
||||
|
||||
try {
|
||||
return (await fetch(oEmbedUrl)).json();
|
||||
} catch(ex) {
|
||||
console.error(ex);
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
const extractMetadata = async (url: string, urlInfo: URL) => {
|
||||
const data = await fetch(url);
|
||||
const contentTypeRaw = data.headers.get('content-type') ?? '';
|
||||
const contentType = parseMediaType(contentTypeRaw);
|
||||
|
||||
const info = {};
|
||||
const addInfoOrDont = (prop, value) => {
|
||||
if(value !== null && value !== undefined)
|
||||
info[prop] = value;
|
||||
};
|
||||
|
||||
if(contentTypeRaw)
|
||||
info.media_type = contentType[0];
|
||||
|
||||
if(['text/html', 'application/xhtml+xml'].includes(contentType[0])) {
|
||||
const html = cheerio.load(await readableStreamToString(data.body));
|
||||
|
||||
const metaData = extractHtmlMetaData(html);
|
||||
const ogData = extractOpenGraphData(html);
|
||||
const twitterData = extractTwitterData(html);
|
||||
|
||||
addInfoOrDont('url', ogData.url ?? metaData.canonical_url ?? url);
|
||||
addInfoOrDont('title', ogData.title ?? twitterData.title ?? metaData.title);
|
||||
addInfoOrDont('site_name', ogData.site_name);
|
||||
addInfoOrDont('description', ogData.description ?? twitterData.description ?? metaData.description);
|
||||
addInfoOrDont('color', metaData.theme_color);
|
||||
|
||||
if(ogData.images?.length > 0) {
|
||||
const image = ogData.images[0];
|
||||
info.image = info.image_url = image.secure_url ?? image.url;
|
||||
if(image.width > 0)
|
||||
info.image_width = image.width;
|
||||
if(image.height > 0)
|
||||
info.image_height = image.height;
|
||||
if(image.type)
|
||||
info.image_type = image.type;
|
||||
if(image.alt)
|
||||
info.image_alt = image.alt;
|
||||
if(info.image_width > 0)
|
||||
info.width = info.image_width;
|
||||
if(info.image_height > 0)
|
||||
info.height = info.image_height;
|
||||
} else {
|
||||
addInfoOrDont('image_url', twitterData.image ?? metaData.image ?? metaData.thumbnail);
|
||||
addInfoOrDont('image_alt', twitterData.image_alt);
|
||||
if(info.image_url)
|
||||
info.image = info.image_url;
|
||||
}
|
||||
|
||||
if(ogData.audios?.length > 0) {
|
||||
const audio = ogData.audios[0];
|
||||
info.audio_url = audio.secure_url ?? audio.url;
|
||||
if(audio.type)
|
||||
info.audio_type = audio.type;
|
||||
}
|
||||
|
||||
if(ogData.videos?.length > 0) {
|
||||
const video = ogData.videos[0];
|
||||
info.video_url = video.secure_url ?? video.url;
|
||||
if(video.width > 0)
|
||||
info.video_width = video.width;
|
||||
if(video.height > 0)
|
||||
info.video_height = video.height;
|
||||
if(video.type)
|
||||
info.video_type = video.type;
|
||||
if(video.tags?.length > 0)
|
||||
info.video_tags = video.tags;
|
||||
if(info.video_width > 0)
|
||||
info.width = info.video_width;
|
||||
if(info.video_height > 0)
|
||||
info.height = info.video_height;
|
||||
} else {
|
||||
addInfoOrDont('video_url', twitterData.player);
|
||||
addInfoOrDont('video_width', twitterData.player_width);
|
||||
addInfoOrDont('video_height', twitterData.player_height);
|
||||
if(info.video_width > 0)
|
||||
info.width = info.video_width;
|
||||
if(info.video_height > 0)
|
||||
info.height = info.video_height;
|
||||
}
|
||||
|
||||
const linkedDatas = extractLinkedData(html);
|
||||
if(linkedDatas.length > 0)
|
||||
info.lds = linkedDatas;
|
||||
|
||||
if(isAllowedOEmbedDomain(urlInfo.host)) {
|
||||
const oEmbedData = await extractOEmbedData(html, url, urlInfo);
|
||||
if(oEmbedData.version)
|
||||
info.oembed = oEmbedData;
|
||||
}
|
||||
} else {
|
||||
info.url = url;
|
||||
info.title = decodeURIComponent(basename(urlInfo.pathname));
|
||||
info.site_name = urlInfo.host;
|
||||
|
||||
if(contentType[0].startsWith('image/')) {
|
||||
//
|
||||
} else if(contentType[0].startsWith('video/')) {
|
||||
//
|
||||
} else if(contentType[0].startsWith('audio/')) {
|
||||
//
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
};
|
||||
|
||||
const requestHandler = async (req: Request): Response => {
|
||||
const url = new URL(req.url);
|
||||
const headers = { 'X-Powered-By': 'Uiharu' };
|
||||
|
||||
if(req.headers.has('origin')) {
|
||||
const originRaw = req.headers.get('origin');
|
||||
const origin = new URL(originRaw);
|
||||
|
||||
if(!allowedOrigins.includes(origin.host))
|
||||
return new Response('403', { status: 403, headers });
|
||||
|
||||
headers['Access-Control-Allow-Origin'] = originRaw;
|
||||
headers['Vary'] = 'Origin';
|
||||
}
|
||||
|
||||
if(req.method === 'OPTIONS') {
|
||||
headers['Allow'] = 'OPTIONS, GET, HEAD, POST';
|
||||
headers['Access-Control-Allow-Methods'] = 'OPTIONS, GET, HEAD, POST';
|
||||
|
||||
// idk if this is the appropriate status code but: balls
|
||||
return new Response('', { status: 204, headers });
|
||||
}
|
||||
|
||||
if(url.pathname === '/metadata') {
|
||||
if(!['GET', 'HEAD', 'POST'].includes(req.method))
|
||||
return new Response('', { status: 405, headers });
|
||||
|
||||
const started = performance.now();
|
||||
|
||||
headers['Content-Type'] = 'application/json;charset=utf-8';
|
||||
|
||||
let urlParamRaw: String = '';
|
||||
if(req.method === 'POST')
|
||||
urlParamRaw = (await readableStreamToString(req.body)).trim();
|
||||
else
|
||||
urlParamRaw = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
|
||||
|
||||
if(urlParamRaw === '')
|
||||
return new Response('{"error":"metadata:uri"}', { status: 400, headers });
|
||||
if(urlParamRaw.startsWith('//'))
|
||||
urlParamRaw = 'https:' + urlParamRaw;
|
||||
|
||||
let urlParam: URL;
|
||||
try {
|
||||
urlParam = new URL(urlParamRaw);
|
||||
} catch(ex) {
|
||||
return new Response('{"error":"metadata:uri"}', { status: 400, headers });
|
||||
}
|
||||
|
||||
urlParamRaw = urlParam.toString();
|
||||
|
||||
const urlHash = encodeBase64Url(
|
||||
await crypto.subtle.digest('SHA-256', new TextEncoder().encode(urlParamRaw))
|
||||
);
|
||||
const cacheKey = `uiharu:metadata:${urlHash}`;
|
||||
// const cacheInfo = await cache.get(cacheKey);
|
||||
// if(cacheInfo !== undefined)
|
||||
// return new Response(
|
||||
// brotliDecompressSync(cacheInfo.value),
|
||||
// {
|
||||
// status: 200,
|
||||
// headers: {
|
||||
// ...headers,
|
||||
// ...{
|
||||
// 'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
|
||||
// 'X-Uiharu-State': 'cache',
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
// );
|
||||
|
||||
try {
|
||||
const json = JSON.stringify(
|
||||
await extractMetadata(urlParamRaw, urlParam)
|
||||
);
|
||||
|
||||
cache.set(cacheKey, brotliCompressSync(json), {
|
||||
compress: false,
|
||||
lifetime: 600
|
||||
});
|
||||
|
||||
return new Response(json, {
|
||||
status: 200,
|
||||
headers: {
|
||||
...headers,
|
||||
...{
|
||||
'Server-Timing': `metadata;dur=${(performance.now() - started).toFixed(6)}`,
|
||||
'X-Uiharu-State': 'fresh',
|
||||
},
|
||||
},
|
||||
});
|
||||
} catch(ex) {
|
||||
console.error(ex);
|
||||
return new Response('{"error":"metadata:lookup"}', { status: 500, headers });
|
||||
}
|
||||
}
|
||||
|
||||
if(url.pathname === '/metadata/batch') {
|
||||
if(!['GET', 'HEAD', 'POST'].includes(req.method))
|
||||
return new Response('', { status: 405, headers });
|
||||
|
||||
return new Response('{"took":0,"results":[]}', {
|
||||
headers: {
|
||||
...headers,
|
||||
...{ 'Content-Type': 'application/json' },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const isAudio = url.pathname === '/metadata/thumb/audio';
|
||||
const isVideo = url.pathname === '/metadata/thumb/video';
|
||||
if(isAudio || isVideo) {
|
||||
if(!['HEAD', 'GET'].includes(req.method))
|
||||
return new Response('', { status: 405, headers });
|
||||
|
||||
let urlParamRaw: String = (new URLSearchParams(url.search)).get('url')?.trim() ?? '';
|
||||
if(urlParamRaw === '')
|
||||
return new Response('missing url parameter', { status: 400, headers });
|
||||
|
||||
let scheme: String = '';
|
||||
try {
|
||||
const urlParam = new URL(urlParamRaw);
|
||||
if(typeof urlParam.protocol === 'string')
|
||||
scheme = urlParam.protocol;
|
||||
|
||||
urlParamRaw = urlParam.toString();
|
||||
} catch(ex) {
|
||||
return new Response('invalid url parameter', { status: 400, headers });
|
||||
}
|
||||
|
||||
if(!['http:', 'https:'].includes(scheme))
|
||||
return new Response('unsupported url scheme', { status: 400, headers });
|
||||
|
||||
// this seems like a terrible idea lol
|
||||
const args = ['-i', urlParamRaw];
|
||||
if(isAudio) args.push('-an');
|
||||
args.push('-f');
|
||||
args.push('image2pipe');
|
||||
args.push('-c:v');
|
||||
args.push(isVideo ? 'png' : 'copy');
|
||||
args.push('-frames:v');
|
||||
args.push('1');
|
||||
args.push('-');
|
||||
|
||||
const { code, stdout, stderr } = await (new Deno.Command('ffmpeg', {
|
||||
stdin: 'null',
|
||||
stdout: 'piped',
|
||||
stderr: 'piped',
|
||||
args,
|
||||
})).output();
|
||||
|
||||
if(code !== 0) {
|
||||
console.error(new TextDecoder().decode(stderr));
|
||||
return new Response('decode failed', { status: 500, headers });
|
||||
}
|
||||
|
||||
// TODO: bother with cache someday maybe
|
||||
const thumb = stdout;
|
||||
|
||||
return new Response(thumb, {
|
||||
headers: {
|
||||
...headers,
|
||||
...{
|
||||
'Content-Type': 'image/png',
|
||||
'Cache-Control': 'public, max-age=31536000, immutable',
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// serving files from /public dir
|
||||
if(['HEAD', 'GET'].includes(req.method)) {
|
||||
const localPathPrefix = import.meta.dirname + '/public/';
|
||||
const localPathSuffix = pathNormalize(url.pathname === '/' ? '/index.html' : url.pathname);
|
||||
const localPath = pathNormalize(localPathPrefix + localPathSuffix);
|
||||
if(localPath.startsWith(localPathPrefix) && existsSync(localPath)) {
|
||||
const mediaTypes = {
|
||||
'html': 'text/html;charset=utf-8',
|
||||
'css': 'text/css;charset=utf-8',
|
||||
'txt': 'text/plain;charset=utf-8',
|
||||
'png': 'image/png',
|
||||
};
|
||||
|
||||
let mediaType: String = 'application/octet-stream';
|
||||
const dotIndex = localPathSuffix.lastIndexOf('.');
|
||||
if(dotIndex >= 0) {
|
||||
const ext = localPathSuffix.substring(dotIndex + 1);
|
||||
if(ext in mediaTypes)
|
||||
mediaType = mediaTypes[ext];
|
||||
}
|
||||
|
||||
return new Response('', {
|
||||
status: 200,
|
||||
headers: {
|
||||
...headers,
|
||||
...{
|
||||
'Content-Type': mediaType,
|
||||
'X-Accel-Redirect': `/_public${localPathSuffix}`,
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// 404 page
|
||||
return new Response('<!doctype html><meta charset=utf-8><title>404 Not Found</title><h1>404 Not Found</h1>', {
|
||||
status: 404,
|
||||
headers: {
|
||||
...headers,
|
||||
...{ 'Content-Type': 'text/html;charset=utf-8' },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// 404 fallback
|
||||
return new Response('', {
|
||||
status: ['OPTIONS', 'HEAD', 'GET', 'POST'].includes(req.method) ? 404 : 405,
|
||||
headers,
|
||||
});
|
||||
};
|
||||
|
||||
Deno.serve({ port }, requestHandler);
|
Loading…
Reference in a new issue