merge: Replace JSDOM with cheerio (!973)
View MR for information: https://activitypub.software/TransFem-org/Sharkey/-/merge_requests/973 Approved-by: dakkar <dakkar@thenautilus.net> Approved-by: Marie <github@yuugi.dev>
This commit is contained in:
commit
c5f5c6fef0
8 changed files with 98 additions and 188 deletions
|
|
@ -91,8 +91,6 @@
|
|||
"@swc/core": "1.11.24",
|
||||
"@transfem-org/sfm-js": "0.24.6",
|
||||
"@twemoji/parser": "15.1.1",
|
||||
"@types/redis-info": "3.0.3",
|
||||
"@types/psl": "^1.1.3",
|
||||
"accepts": "1.3.8",
|
||||
"ajv": "8.17.1",
|
||||
"archiver": "7.0.1",
|
||||
|
|
@ -108,6 +106,7 @@
|
|||
"cbor": "9.0.2",
|
||||
"chalk": "5.4.1",
|
||||
"chalk-template": "1.1.0",
|
||||
"cheerio": "1.0.0",
|
||||
"chokidar": "3.6.0",
|
||||
"cli-highlight": "2.1.11",
|
||||
"color-convert": "2.0.1",
|
||||
|
|
@ -132,7 +131,6 @@
|
|||
"ipaddr.js": "2.2.0",
|
||||
"is-svg": "5.1.0",
|
||||
"js-yaml": "4.1.0",
|
||||
"jsdom": "26.1.0",
|
||||
"json5": "2.2.3",
|
||||
"jsonld": "8.3.3",
|
||||
"jsrsasign": "11.1.0",
|
||||
|
|
@ -209,7 +207,6 @@
|
|||
"@types/http-link-header": "1.0.7",
|
||||
"@types/jest": "29.5.14",
|
||||
"@types/js-yaml": "4.0.9",
|
||||
"@types/jsdom": "21.1.7",
|
||||
"@types/jsonld": "1.5.15",
|
||||
"@types/jsrsasign": "10.5.15",
|
||||
"@types/mime-types": "2.1.4",
|
||||
|
|
@ -221,10 +218,12 @@
|
|||
"@types/oauth2orize-pkce": "0.1.2",
|
||||
"@types/pg": "8.11.14",
|
||||
"@types/proxy-addr": "^2.0.3",
|
||||
"@types/psl": "^1.1.3",
|
||||
"@types/pug": "2.0.10",
|
||||
"@types/qrcode": "1.5.5",
|
||||
"@types/random-seed": "0.3.5",
|
||||
"@types/ratelimiter": "3.4.6",
|
||||
"@types/redis-info": "3.0.3",
|
||||
"@types/rename": "1.0.7",
|
||||
"@types/sanitize-html": "2.15.0",
|
||||
"@types/semver": "7.7.0",
|
||||
|
|
|
|||
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
import { URL } from 'node:url';
|
||||
import { Inject, Injectable } from '@nestjs/common';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import tinycolor from 'tinycolor2';
|
||||
import * as Redis from 'ioredis';
|
||||
import { load as cheerio } from 'cheerio';
|
||||
import type { MiInstance } from '@/models/Instance.js';
|
||||
import type Logger from '@/logger.js';
|
||||
import { DI } from '@/di-symbols.js';
|
||||
|
|
@ -15,7 +15,7 @@ import { LoggerService } from '@/core/LoggerService.js';
|
|||
import { HttpRequestService } from '@/core/HttpRequestService.js';
|
||||
import { bindThis } from '@/decorators.js';
|
||||
import { FederatedInstanceService } from '@/core/FederatedInstanceService.js';
|
||||
import type { DOMWindow } from 'jsdom';
|
||||
import type { CheerioAPI } from 'cheerio';
|
||||
|
||||
type NodeInfo = {
|
||||
openRegistrations?: unknown;
|
||||
|
|
@ -181,17 +181,14 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
@bindThis
|
||||
private async fetchDom(instance: MiInstance): Promise<Document> {
|
||||
private async fetchDom(instance: MiInstance): Promise<CheerioAPI> {
|
||||
this.logger.info(`Fetching HTML of ${instance.host} ...`);
|
||||
|
||||
const url = 'https://' + instance.host;
|
||||
|
||||
const html = await this.httpRequestService.getHtml(url);
|
||||
|
||||
const { window } = new JSDOM(html);
|
||||
const doc = window.document;
|
||||
|
||||
return doc;
|
||||
return cheerio(html);
|
||||
}
|
||||
|
||||
@bindThis
|
||||
|
|
@ -206,12 +203,15 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
@bindThis
|
||||
private async fetchFaviconUrl(instance: MiInstance, doc: Document | null): Promise<string | null> {
|
||||
private async fetchFaviconUrl(instance: MiInstance, doc: CheerioAPI | null): Promise<string | null> {
|
||||
const url = 'https://' + instance.host;
|
||||
|
||||
if (doc) {
|
||||
// https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043
|
||||
const href = Array.from(doc.getElementsByTagName('link')).reverse().find(link => link.relList.contains('icon'))?.href;
|
||||
const href = doc('link[rel][href]')
|
||||
.filter((_, link) => link.attribs.rel.split(' ').includes('icon'))
|
||||
.last()
|
||||
.attr('href');
|
||||
|
||||
if (href) {
|
||||
return (new URL(href, url)).href;
|
||||
|
|
@ -232,7 +232,7 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
@bindThis
|
||||
private async fetchIconUrl(instance: MiInstance, doc: Document | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
private async fetchIconUrl(instance: MiInstance, doc: CheerioAPI | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
if (manifest && manifest.icons && manifest.icons.length > 0 && manifest.icons[0].src) {
|
||||
const url = 'https://' + instance.host;
|
||||
return (new URL(manifest.icons[0].src, url)).href;
|
||||
|
|
@ -242,13 +242,16 @@ export class FetchInstanceMetadataService {
|
|||
const url = 'https://' + instance.host;
|
||||
|
||||
// https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043
|
||||
const links = Array.from(doc.getElementsByTagName('link')).reverse();
|
||||
const links = Array.from(doc('link[rel][href]')).reverse().map(link => ({
|
||||
rel: link.attribs.rel.split(' '),
|
||||
href: link.attribs.href,
|
||||
}));
|
||||
// https://github.com/misskey-dev/misskey/pull/8220/files/0ec4eba22a914e31b86874f12448f88b3e58dd5a#r796487559
|
||||
const href =
|
||||
[
|
||||
links.find(link => link.relList.contains('apple-touch-icon-precomposed'))?.href,
|
||||
links.find(link => link.relList.contains('apple-touch-icon'))?.href,
|
||||
links.find(link => link.relList.contains('icon'))?.href,
|
||||
links.find(link => link.rel.includes('apple-touch-icon-precomposed'))?.href,
|
||||
links.find(link => link.rel.includes('apple-touch-icon'))?.href,
|
||||
links.find(link => link.rel.includes('icon'))?.href,
|
||||
]
|
||||
.find(href => href);
|
||||
|
||||
|
|
@ -261,8 +264,8 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
@bindThis
|
||||
private async getThemeColor(info: NodeInfo | null, doc: Document | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
const themeColor = info?.metadata?.themeColor ?? doc?.querySelector('meta[name="theme-color"]')?.getAttribute('content') ?? manifest?.theme_color;
|
||||
private async getThemeColor(info: NodeInfo | null, doc: CheerioAPI | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
const themeColor = info?.metadata?.themeColor ?? doc?.('meta[name="theme-color"][content]').attr('content') ?? manifest?.theme_color;
|
||||
|
||||
if (themeColor) {
|
||||
const color = new tinycolor(themeColor);
|
||||
|
|
@ -273,7 +276,7 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
@bindThis
|
||||
private async getSiteName(info: NodeInfo | null, doc: Document | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
private async getSiteName(info: NodeInfo | null, doc: CheerioAPI | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
if (info && info.metadata) {
|
||||
if (typeof info.metadata.nodeName === 'string') {
|
||||
return info.metadata.nodeName;
|
||||
|
|
@ -283,7 +286,7 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
if (doc) {
|
||||
const og = doc.querySelector('meta[property="og:title"]')?.getAttribute('content');
|
||||
const og = doc('meta[property="og:title"][content]').attr('content');
|
||||
|
||||
if (og) {
|
||||
return og;
|
||||
|
|
@ -298,7 +301,7 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
@bindThis
|
||||
private async getDescription(info: NodeInfo | null, doc: Document | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
private async getDescription(info: NodeInfo | null, doc: CheerioAPI | null, manifest: Record<string, any> | null): Promise<string | null> {
|
||||
if (info && info.metadata) {
|
||||
if (typeof info.metadata.nodeDescription === 'string') {
|
||||
return info.metadata.nodeDescription;
|
||||
|
|
@ -308,12 +311,12 @@ export class FetchInstanceMetadataService {
|
|||
}
|
||||
|
||||
if (doc) {
|
||||
const meta = doc.querySelector('meta[name="description"]')?.getAttribute('content');
|
||||
const meta = doc('meta[name="description"][content]').attr('content');
|
||||
if (meta) {
|
||||
return meta;
|
||||
}
|
||||
|
||||
const og = doc.querySelector('meta[property="og:description"]')?.getAttribute('content');
|
||||
const og = doc('meta[property="og:description"][content]').attr('content');
|
||||
if (og) {
|
||||
return og;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,32 +3,29 @@
|
|||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { load as cheerio } from 'cheerio';
|
||||
import type { HttpRequestService } from '@/core/HttpRequestService.js';
|
||||
|
||||
type Field = { name: string, value: string };
|
||||
|
||||
export async function verifyFieldLinks(fields: Field[], profile_url: string, httpRequestService: HttpRequestService): Promise<string[]> {
|
||||
const verified_links = [];
|
||||
for (const field_url of fields
|
||||
.filter(x => URL.canParse(x.value) && ['http:', 'https:'].includes((new URL(x.value).protocol)))) {
|
||||
for (const field_url of fields.filter(x => URL.canParse(x.value) && ['http:', 'https:'].includes((new URL(x.value).protocol)))) {
|
||||
try {
|
||||
const html = await httpRequestService.getHtml(field_url.value);
|
||||
|
||||
const { window } = new JSDOM(html);
|
||||
const doc: Document = window.document;
|
||||
const doc = cheerio(html);
|
||||
|
||||
const aEls = Array.from(doc.getElementsByTagName('a'));
|
||||
const linkEls = Array.from(doc.getElementsByTagName('link'));
|
||||
const links = doc('a[rel~="me"][href], link[rel~="me"][href]').toArray();
|
||||
|
||||
const includesProfileLinks = [...aEls, ...linkEls].some(link => link.rel === 'me' && link.href === profile_url);
|
||||
if (includesProfileLinks) { verified_links.push(field_url.value); }
|
||||
|
||||
window.close();
|
||||
} catch (err) {
|
||||
const includesProfileLinks = links.some(link => link.attribs.href === profile_url);
|
||||
if (includesProfileLinks) {
|
||||
verified_links.push(field_url.value);
|
||||
}
|
||||
} catch {
|
||||
// don't do anything.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return verified_links;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@
|
|||
import * as mfm from '@transfem-org/sfm-js';
|
||||
import { Inject, Injectable } from '@nestjs/common';
|
||||
import ms from 'ms';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { extractCustomEmojisFromMfm } from '@/misc/extract-custom-emojis-from-mfm.js';
|
||||
import { extractHashtags } from '@/misc/extract-hashtags.js';
|
||||
import * as Acct from '@/misc/acct.js';
|
||||
|
|
@ -622,6 +621,7 @@ export default class extends Endpoint<typeof meta, typeof paramDef> { // eslint-
|
|||
}
|
||||
|
||||
// this function is superseded by '@/misc/verify-field-link.ts'
|
||||
/*
|
||||
private async verifyLink(url: string, user: MiLocalUser) {
|
||||
if (!safeForSql(url)) return;
|
||||
|
||||
|
|
@ -653,6 +653,7 @@ export default class extends Endpoint<typeof meta, typeof paramDef> { // eslint-
|
|||
// なにもしない
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// these two methods need to be kept in sync with
|
||||
// `ApRendererService.renderPerson`
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ export class ApiSearchMastodon {
|
|||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
...request.headers as HeadersInit,
|
||||
...request.headers,
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
|
|
@ -135,7 +135,7 @@ export class ApiSearchMastodon {
|
|||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
...request.headers as HeadersInit,
|
||||
...request.headers,
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import {
|
|||
ResourceOwnerPassword,
|
||||
} from 'simple-oauth2';
|
||||
import pkceChallenge from 'pkce-challenge';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { load as cheerio } from 'cheerio';
|
||||
import Fastify, { type FastifyInstance, type FastifyReply } from 'fastify';
|
||||
import { api, port, sendEnvUpdateRequest, signup } from '../utils.js';
|
||||
import type * as misskey from 'misskey-js';
|
||||
|
|
@ -73,11 +73,11 @@ const clientConfig: ModuleOptions<'client_id'> = {
|
|||
};
|
||||
|
||||
function getMeta(html: string): { transactionId: string | undefined, clientName: string | undefined, clientLogo: string | undefined } {
|
||||
const fragment = JSDOM.fragment(html);
|
||||
const fragment = cheerio(html);
|
||||
return {
|
||||
transactionId: fragment.querySelector<HTMLMetaElement>('meta[name="misskey:oauth:transaction-id"]')?.content,
|
||||
clientName: fragment.querySelector<HTMLMetaElement>('meta[name="misskey:oauth:client-name"]')?.content,
|
||||
clientLogo: fragment.querySelector<HTMLMetaElement>('meta[name="misskey:oauth:client-logo"]')?.content,
|
||||
transactionId: fragment('meta[name="misskey:oauth:transaction-id"][content]').attr('content'),
|
||||
clientName: fragment('meta[name="misskey:oauth:client-name"][content]').attr('content'),
|
||||
clientLogo: fragment('meta[name="misskey:oauth:client-logo"][content]').attr('content'),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,11 +11,12 @@ import { inspect } from 'node:util';
|
|||
import WebSocket, { ClientOptions } from 'ws';
|
||||
import fetch, { File, RequestInit, type Headers } from 'node-fetch';
|
||||
import { DataSource } from 'typeorm';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { load as cheerio } from 'cheerio';
|
||||
import { type Response } from 'node-fetch';
|
||||
import Fastify from 'fastify';
|
||||
import { entities } from '../src/postgres.js';
|
||||
import { loadConfig } from '../src/config.js';
|
||||
import type { CheerioAPI } from 'cheerio';
|
||||
import type * as misskey from 'misskey-js';
|
||||
import { DEFAULT_POLICIES } from '@/core/RoleService.js';
|
||||
import { validateContentTypeSetAsActivityPub } from '@/core/activitypub/misc/validator.js';
|
||||
|
|
@ -464,7 +465,7 @@ export function makeStreamCatcher<T>(
|
|||
|
||||
export type SimpleGetResponse = {
|
||||
status: number,
|
||||
body: any | JSDOM | null,
|
||||
body: any | CheerioAPI | null,
|
||||
type: string | null,
|
||||
location: string | null
|
||||
};
|
||||
|
|
@ -495,7 +496,7 @@ export const simpleGet = async (path: string, accept = '*/*', cookie: any = unde
|
|||
|
||||
const body =
|
||||
jsonTypes.includes(res.headers.get('content-type') ?? '') ? await res.json() :
|
||||
htmlTypes.includes(res.headers.get('content-type') ?? '') ? new JSDOM(await res.text()) :
|
||||
htmlTypes.includes(res.headers.get('content-type') ?? '') ? cheerio(await res.text()) :
|
||||
await bodyExtractor(res);
|
||||
|
||||
return {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue