View MR for information: https://activitypub.software/TransFem-org/Sharkey/-/merge_requests/1113 Closes #1074, #1104, and #1105 Approved-by: dakkar <dakkar@thenautilus.net> Approved-by: Marie <github@yuugi.dev>
This commit is contained in:
commit
8926ba06a6
16 changed files with 960 additions and 181 deletions
|
|
@ -90,7 +90,7 @@
|
|||
"@simplewebauthn/server": "12.0.0",
|
||||
"@sinonjs/fake-timers": "11.3.1",
|
||||
"@smithy/node-http-handler": "2.5.0",
|
||||
"mfm-js": "npm:@transfem-org/sfm-js@0.24.6",
|
||||
"mfm-js": "npm:@transfem-org/sfm-js@0.24.8",
|
||||
"@twemoji/parser": "15.1.1",
|
||||
"accepts": "1.3.8",
|
||||
"ajv": "8.17.1",
|
||||
|
|
|
|||
|
|
@ -335,6 +335,38 @@ export class MfmService {
|
|||
break;
|
||||
}
|
||||
|
||||
// Replace iframe with link so we can generate previews.
|
||||
// We shouldn't normally see this, but federated blogging platforms (WordPress, MicroBlog.Pub) can send it.
|
||||
case 'iframe': {
|
||||
const txt: string | undefined = node.attribs.title || node.attribs.alt;
|
||||
const href: string | undefined = node.attribs.src;
|
||||
if (href) {
|
||||
if (href.match(/[\s>]/)) {
|
||||
if (txt) {
|
||||
// href is invalid + has a label => render a pseudo-link
|
||||
text += `${text} (${href})`;
|
||||
} else {
|
||||
// href is invalid + no label => render plain text
|
||||
text += href;
|
||||
}
|
||||
} else {
|
||||
if (txt) {
|
||||
// href is valid + has a label => render a link
|
||||
const label = txt
|
||||
.replaceAll('[', '(')
|
||||
.replaceAll(']', ')')
|
||||
.replaceAll(/\r?\n/, ' ')
|
||||
.replaceAll('`', '\'');
|
||||
text += `[${label}](<${href}>)`;
|
||||
} else {
|
||||
// href is valid + no label => render a plain URL
|
||||
text += `<${href}>`;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: // includes inline elements
|
||||
{
|
||||
appendChildren(node.childNodes);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { load as cheerio } from 'cheerio/slim';
|
||||
import type { IApDocument } from '@/core/activitypub/type.js';
|
||||
import type { CheerioAPI } from 'cheerio/slim';
|
||||
|
||||
/**
|
||||
* Finds HTML elements representing inline media and returns them as simulated AP documents.
|
||||
* Returns an empty array if the input cannot be parsed, or no media was found.
|
||||
* @param html Input HTML to analyze.
|
||||
*/
|
||||
export function extractMediaFromHtml(html: string): IApDocument[] {
|
||||
const $ = parseHtml(html);
|
||||
if (!$) return [];
|
||||
|
||||
const attachments = new Map<string, IApDocument>();
|
||||
|
||||
// <img> tags, including <picture> and <object> fallback elements
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img
|
||||
$('img[src]')
|
||||
.toArray()
|
||||
.forEach(img => attachments.set(img.attribs.src, {
|
||||
type: 'Image',
|
||||
url: img.attribs.src,
|
||||
name: img.attribs.alt || img.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <object> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object
|
||||
$('object[data]')
|
||||
.toArray()
|
||||
.forEach(object => attachments.set(object.attribs.data, {
|
||||
type: 'Document',
|
||||
url: object.attribs.data,
|
||||
name: object.attribs.alt || object.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <embed> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed
|
||||
$('embed[src]')
|
||||
.toArray()
|
||||
.forEach(embed => attachments.set(embed.attribs.src, {
|
||||
type: 'Document',
|
||||
url: embed.attribs.src,
|
||||
name: embed.attribs.alt || embed.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <audio> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/audio
|
||||
$('audio[src]')
|
||||
.toArray()
|
||||
.forEach(audio => attachments.set(audio.attribs.src, {
|
||||
type: 'Audio',
|
||||
url: audio.attribs.src,
|
||||
name: audio.attribs.alt || audio.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <video> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/video
|
||||
$('video[src]')
|
||||
.toArray()
|
||||
.forEach(audio => attachments.set(audio.attribs.src, {
|
||||
type: 'Video',
|
||||
url: audio.attribs.src,
|
||||
name: audio.attribs.alt || audio.attribs.title || null,
|
||||
}));
|
||||
|
||||
// TODO support <svg>? We would need to extract it directly from the HTML and save to a temp file.
|
||||
|
||||
return Array.from(attachments.values());
|
||||
}
|
||||
|
||||
function parseHtml(html: string): CheerioAPI | null {
|
||||
try {
|
||||
return cheerio(html);
|
||||
} catch {
|
||||
// Don't worry about invalid HTML
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { parse, inspect, extract } from 'mfm-js';
|
||||
import type { IApDocument } from '@/core/activitypub/type.js';
|
||||
import type { MfmNode, MfmText } from 'mfm-js';
|
||||
|
||||
/**
|
||||
* Finds MFM notes representing inline media and returns them as simulated AP documents.
|
||||
* Returns an empty array if the input cannot be parsed, or no media was found.
|
||||
* @param mfm Input MFM to analyze.
|
||||
*/
|
||||
export function extractMediaFromMfm(mfm: string): IApDocument[] {
|
||||
const nodes = parseMfm(mfm);
|
||||
if (nodes == null) return [];
|
||||
|
||||
const attachments = new Map<string, IApDocument>();
|
||||
|
||||
inspect(nodes, node => {
|
||||
if (node.type === 'link' && node.props.image) {
|
||||
const alt: string[] = [];
|
||||
|
||||
inspect(node.children, node => {
|
||||
switch (node.type) {
|
||||
case 'text':
|
||||
alt.push(node.props.text);
|
||||
break;
|
||||
case 'unicodeEmoji':
|
||||
alt.push(node.props.emoji);
|
||||
break;
|
||||
case 'emojiCode':
|
||||
alt.push(':');
|
||||
alt.push(node.props.name);
|
||||
alt.push(':');
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
attachments.set(node.props.url, {
|
||||
type: 'Image',
|
||||
url: node.props.url,
|
||||
name: alt.length > 0
|
||||
? alt.join('')
|
||||
: null,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return Array.from(attachments.values());
|
||||
}
|
||||
|
||||
function parseMfm(mfm: string): MfmNode[] | null {
|
||||
try {
|
||||
return parse(mfm);
|
||||
} catch {
|
||||
// Don't worry about invalid MFM
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import type { IPost } from '@/core/activitypub/type.js';
|
||||
import { toArray } from '@/misc/prelude/array.js';
|
||||
|
||||
/**
|
||||
* Gets content of a specified media type from a provided object.
|
||||
*
|
||||
* Optionally supports a "permissive" mode which enables the following changes:
|
||||
* 1. MIME types are checked in a case-insensitive manner.
|
||||
* 2. MIME types are matched based on inclusion, not strict equality.
|
||||
* 3. A candidate content is considered to match if it has no specified MIME type.
|
||||
*
|
||||
* Note: this method is written defensively to protect against malform remote objects.
|
||||
* When extending or modifying it, please be sure to work with "unknown" type and validate everything.
|
||||
*
|
||||
* Note: the logic in this method is carefully ordered to match the selection priority of existing code in ApNoteService.
|
||||
* Please do not re-arrange it without testing!
|
||||
* New checks can be added to the end of the method to safely extend the existing logic.
|
||||
*
|
||||
* @param object AP object to extract content from.
|
||||
* @param mimeType MIME type to look for.
|
||||
* @param permissive Enables permissive mode, as described above. Defaults to false (disabled).
|
||||
*/
|
||||
export function getContentByType(object: IPost | Record<string, unknown>, mimeType: string, permissive = false): string | null {
|
||||
// Case 1: Extended "source" property
|
||||
if (object.source && typeof(object.source) === 'object') {
|
||||
// "source" is permitted to be an array, though no implementations are known to do this yet.
|
||||
const sources = toArray(object.source) as Record<string, unknown>[];
|
||||
for (const source of sources) {
|
||||
if (typeof (source.content) === 'string' && checkMediaType(source.mediaType)) {
|
||||
return source.content;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2: Special case for MFM
|
||||
if (typeof(object._misskey_content) === 'string' && mimeType === 'text/x.misskeymarkdown') {
|
||||
return object._misskey_content;
|
||||
}
|
||||
|
||||
// Case 3: AP native "content" property
|
||||
if (typeof(object.content) === 'string' && checkMediaType(object.mediaType)) {
|
||||
return object.content;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
// Checks if the provided media type matches the input parameters.
|
||||
function checkMediaType(mediaType: unknown): boolean {
|
||||
if (typeof(mediaType) === 'string') {
|
||||
// Strict match
|
||||
if (mediaType === mimeType) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Permissive match
|
||||
if (permissive && mediaType.toLowerCase().includes(mimeType.toLowerCase())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Permissive fallback match
|
||||
if (permissive && mediaType == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// No match
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -86,7 +86,7 @@ export class ApImageService {
|
|||
uri: image.url,
|
||||
sensitive: !!(image.sensitive),
|
||||
isLink: !shouldBeCached,
|
||||
comment: truncate(image.name ?? undefined, this.config.maxRemoteAltTextLength),
|
||||
comment: truncate(image.summary || image.name || undefined, this.config.maxRemoteAltTextLength),
|
||||
});
|
||||
if (!file.isLink || file.url === image.url) return file;
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
import { forwardRef, Inject, Injectable } from '@nestjs/common';
|
||||
import { In } from 'typeorm';
|
||||
import { UnrecoverableError } from 'bullmq';
|
||||
import promiseLimit from 'promise-limit';
|
||||
import { DI } from '@/di-symbols.js';
|
||||
import type { UsersRepository, PollsRepository, EmojisRepository, NotesRepository, MiMeta } from '@/models/_.js';
|
||||
import type { Config } from '@/config.js';
|
||||
|
|
@ -27,6 +28,9 @@ import { checkHttps } from '@/misc/check-https.js';
|
|||
import { IdentifiableError } from '@/misc/identifiable-error.js';
|
||||
import { isRetryableError } from '@/misc/is-retryable-error.js';
|
||||
import { renderInlineError } from '@/misc/render-inline-error.js';
|
||||
import { extractMediaFromHtml } from '@/core/activitypub/misc/extract-media-from-html.js';
|
||||
import { extractMediaFromMfm } from '@/core/activitypub/misc/extract-media-from-mfm.js';
|
||||
import { getContentByType } from '@/core/activitypub/misc/get-content-by-type.js';
|
||||
import { getOneApId, getApId, validPost, isEmoji, getApType, isApObject, isDocument, IApDocument, isLink } from '../type.js';
|
||||
import { ApLoggerService } from '../ApLoggerService.js';
|
||||
import { ApMfmService } from '../ApMfmService.js';
|
||||
|
|
@ -206,12 +210,10 @@ export class ApNoteService {
|
|||
const cw = note.summary === '' ? null : note.summary;
|
||||
|
||||
// テキストのパース
|
||||
let text: string | null = null;
|
||||
if (note.source?.mediaType === 'text/x.misskeymarkdown' && typeof note.source.content === 'string') {
|
||||
text = note.source.content;
|
||||
} else if (typeof note._misskey_content !== 'undefined') {
|
||||
text = note._misskey_content;
|
||||
} else if (typeof note.content === 'string') {
|
||||
let text =
|
||||
getContentByType(note, 'text/x.misskeymarkdown') ??
|
||||
getContentByType(note, 'text/markdown');
|
||||
if (text == null && typeof note.content === 'string') {
|
||||
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
||||
}
|
||||
|
||||
|
|
@ -248,21 +250,14 @@ export class ApNoteService {
|
|||
}
|
||||
}
|
||||
|
||||
const processErrors: string[] = [];
|
||||
|
||||
// 添付ファイル
|
||||
const files: MiDriveFile[] = [];
|
||||
|
||||
for (const attach of toArray(note.attachment)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||
const icon = getBestIcon(note);
|
||||
if (icon) {
|
||||
icon.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, icon);
|
||||
if (file) files.push(file);
|
||||
// Note: implementation moved to getAttachment function to avoid duplication.
|
||||
// Please copy any upstream changes to that method! (It's in the bottom of this class)
|
||||
const { files, hasFileError } = await this.getAttachments(note, actor);
|
||||
if (hasFileError) {
|
||||
processErrors.push('attachmentFailed');
|
||||
}
|
||||
|
||||
// リプライ
|
||||
|
|
@ -284,7 +279,9 @@ export class ApNoteService {
|
|||
|
||||
// 引用
|
||||
const quote = await this.getQuote(note, entryUri, resolver);
|
||||
const processErrors = quote === null ? ['quoteUnavailable'] : null;
|
||||
if (quote === null) {
|
||||
processErrors.push('quoteUnavailable');
|
||||
}
|
||||
|
||||
if (reply && reply.userHost == null && reply.localOnly) {
|
||||
throw new IdentifiableError('12e23cec-edd9-442b-aa48-9c21f0c3b215', 'Cannot reply to local-only note');
|
||||
|
|
@ -328,7 +325,7 @@ export class ApNoteService {
|
|||
files,
|
||||
reply,
|
||||
renote: quote ?? null,
|
||||
processErrors,
|
||||
processErrors: processErrors.length > 0 ? processErrors : null,
|
||||
name: note.name,
|
||||
cw,
|
||||
text,
|
||||
|
|
@ -412,12 +409,10 @@ export class ApNoteService {
|
|||
const cw = note.summary === '' ? null : note.summary;
|
||||
|
||||
// テキストのパース
|
||||
let text: string | null = null;
|
||||
if (note.source?.mediaType === 'text/x.misskeymarkdown' && typeof note.source.content === 'string') {
|
||||
text = note.source.content;
|
||||
} else if (typeof note._misskey_content !== 'undefined') {
|
||||
text = note._misskey_content;
|
||||
} else if (typeof note.content === 'string') {
|
||||
let text =
|
||||
getContentByType(note, 'text/x.misskeymarkdown') ??
|
||||
getContentByType(note, 'text/markdown');
|
||||
if (text == null && typeof note.content === 'string') {
|
||||
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
||||
}
|
||||
|
||||
|
|
@ -446,21 +441,12 @@ export class ApNoteService {
|
|||
}
|
||||
}
|
||||
|
||||
const processErrors: string[] = [];
|
||||
|
||||
// 添付ファイル
|
||||
const files: MiDriveFile[] = [];
|
||||
|
||||
for (const attach of toArray(note.attachment)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||
const icon = getBestIcon(note);
|
||||
if (icon) {
|
||||
icon.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, icon);
|
||||
if (file) files.push(file);
|
||||
const { files, hasFileError } = await this.getAttachments(note, actor);
|
||||
if (hasFileError) {
|
||||
processErrors.push('attachmentFailed');
|
||||
}
|
||||
|
||||
// リプライ
|
||||
|
|
@ -482,7 +468,9 @@ export class ApNoteService {
|
|||
|
||||
// 引用
|
||||
const quote = await this.getQuote(note, entryUri, resolver);
|
||||
const processErrors = quote === null ? ['quoteUnavailable'] : null;
|
||||
if (quote === null) {
|
||||
processErrors.push('quoteUnavailable');
|
||||
}
|
||||
|
||||
if (quote && quote.userHost == null && quote.localOnly) {
|
||||
throw new IdentifiableError('12e23cec-edd9-442b-aa48-9c21f0c3b215', 'Cannot quote a local-only note');
|
||||
|
|
@ -523,7 +511,7 @@ export class ApNoteService {
|
|||
files,
|
||||
reply,
|
||||
renote: quote ?? null,
|
||||
processErrors,
|
||||
processErrors: processErrors.length > 0 ? processErrors : null,
|
||||
name: note.name,
|
||||
cw,
|
||||
text,
|
||||
|
|
@ -722,10 +710,95 @@ export class ApNoteService {
|
|||
// Permanent error - return null
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts and saves all media attachments from the provided note.
|
||||
* Returns an array of all the created files.
|
||||
*/
|
||||
private async getAttachments(note: IPost, actor: MiRemoteUser): Promise<{ files: MiDriveFile[], hasFileError: boolean }> {
|
||||
const attachments = new Map<string, IApDocument & { url: string }>();
|
||||
|
||||
// Extract inline media from HTML content.
|
||||
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
||||
const htmlContent = getContentByType(note, 'text/html', true);
|
||||
if (htmlContent) {
|
||||
for (const attach of extractMediaFromHtml(htmlContent)) {
|
||||
if (hasUrl(attach)) {
|
||||
attachments.set(attach.url, attach);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract inline media from MFM / markdown content.
|
||||
const mfmContent =
|
||||
getContentByType(note, 'text/x.misskeymarkdown') ??
|
||||
getContentByType(note, 'text/markdown');
|
||||
if (mfmContent) {
|
||||
for (const attach of extractMediaFromMfm(mfmContent)) {
|
||||
if (hasUrl(attach)) {
|
||||
attachments.set(attach.url, attach);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||
const icon = getBestIcon(note);
|
||||
if (icon) {
|
||||
if (hasUrl(icon)) {
|
||||
attachments.set(icon.url, icon);
|
||||
}
|
||||
}
|
||||
|
||||
// Populate AP attachments last, to overwrite any "fallback" elements that may have been inlined in HTML.
|
||||
// AP attachments should be considered canonical.
|
||||
for (const attach of toArray(note.attachment)) {
|
||||
if (hasUrl(attach)) {
|
||||
attachments.set(attach.url, attach);
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve all files w/ concurrency 2.
|
||||
// This prevents one big file from blocking the others.
|
||||
const limiter = promiseLimit<MiDriveFile | null>(2);
|
||||
const results = await Promise
|
||||
.all(Array
|
||||
.from(attachments.values())
|
||||
.map(attach => limiter(async () => {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
return await this.resolveImage(actor, attach);
|
||||
})));
|
||||
|
||||
// Process results
|
||||
let hasFileError = false;
|
||||
const files: MiDriveFile[] = [];
|
||||
for (const result of results) {
|
||||
if (result != null) {
|
||||
files.push(result);
|
||||
} else {
|
||||
hasFileError = true;
|
||||
}
|
||||
}
|
||||
|
||||
return { files, hasFileError };
|
||||
}
|
||||
|
||||
private async resolveImage(actor: MiRemoteUser, attachment: IApDocument & { url: string }): Promise<MiDriveFile | null> {
|
||||
try {
|
||||
return await this.apImageService.resolveImage(actor, attachment);
|
||||
} catch (err) {
|
||||
if (isRetryableError(err)) {
|
||||
this.logger.warn(`Temporary failure to resolve attachment at ${attachment.url}: ${renderInlineError(err)}`);
|
||||
throw err;
|
||||
} else {
|
||||
this.logger.warn(`Permanent failure to resolve attachment at ${attachment.url}: ${renderInlineError(err)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getBestIcon(note: IObject): IObject | null {
|
||||
const icons: IObject[] = toArray(note.icon);
|
||||
function getBestIcon(note: IObject): IApDocument | null {
|
||||
const icons: IApDocument[] = toArray(note.icon);
|
||||
if (icons.length < 2) {
|
||||
return icons[0] ?? null;
|
||||
}
|
||||
|
|
@ -741,3 +814,8 @@ function getBestIcon(note: IObject): IObject | null {
|
|||
return best;
|
||||
}, null as IApDocument | null) ?? null;
|
||||
}
|
||||
|
||||
// Need this to make TypeScript happy...
|
||||
function hasUrl<T extends IObject>(object: T): object is T & { url: string } {
|
||||
return typeof(object.url) === 'string';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ export interface IObject {
|
|||
cc?: ApObject;
|
||||
to?: ApObject;
|
||||
attributedTo?: ApObject;
|
||||
attachment?: any[];
|
||||
attachment?: IApDocument[];
|
||||
inReplyTo?: any;
|
||||
replies?: ICollection | IOrderedCollection | string;
|
||||
content?: string | null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,297 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { extractMediaFromHtml } from '@/core/activitypub/misc/extract-media-from-html.js';
|
||||
|
||||
describe(extractMediaFromHtml, () => {
|
||||
it('should return empty for invalid input', () => {
|
||||
const result = extractMediaFromHtml('<broken html');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty for empty input', () => {
|
||||
const result = extractMediaFromHtml('');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty for input without attachments', () => {
|
||||
const result = extractMediaFromHtml('<div>No media here!</div>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract img tags', () => {
|
||||
const result = extractMediaFromHtml('<img src="https://example.com/img.png" alt=""/>');
|
||||
expect(result).toEqual([{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/img.png',
|
||||
name: null,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should ignore img tags without src', () => {
|
||||
const result = extractMediaFromHtml('<img alt=""/>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract picture tags with img', () => {
|
||||
const result = extractMediaFromHtml('<picture><img src="https://example.com/picture.png" alt=""/></picture>');
|
||||
expect(result).toEqual([{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/picture.png',
|
||||
name: null,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should ignore picture tags without img', () => {
|
||||
const result = extractMediaFromHtml('<picture><source src="https://example.com/picture.png"/></picture>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should ignore picture tags without src', () => {
|
||||
const result = extractMediaFromHtml('<picture><source/><img alt=""/></picture>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract object tags', () => {
|
||||
const result = extractMediaFromHtml('<object data="https://example.com/object.dat"></object>');
|
||||
expect(result).toEqual([{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/object.dat',
|
||||
name: null,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should ignore object tags without data', () => {
|
||||
const result = extractMediaFromHtml('<object></object>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract object tags with img fallback', () => {
|
||||
const result = extractMediaFromHtml('<object><img src="https://example.com/object.png" alt=""/></object>');
|
||||
expect(result).toEqual([{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/object.png',
|
||||
name: null,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should ignore object tags with empty img fallback', () => {
|
||||
const result = extractMediaFromHtml('<object><img alt=""/></object>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract embed tags', () => {
|
||||
const result = extractMediaFromHtml('<embed src="https://example.com/embed.dat"/>');
|
||||
expect(result).toEqual([{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/embed.dat',
|
||||
name: null,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should ignore embed tags without src', () => {
|
||||
const result = extractMediaFromHtml('<embed/>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract audio tags', () => {
|
||||
const result = extractMediaFromHtml('<audio src="https://example.com/audio.mp3"></audio>');
|
||||
expect(result).toEqual([{
|
||||
type: 'Audio',
|
||||
url: 'https://example.com/audio.mp3',
|
||||
name: null,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should ignore audio tags without src', () => {
|
||||
const result = extractMediaFromHtml('<audio></audio>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract video tags', () => {
|
||||
const result = extractMediaFromHtml('<video src="https://example.com/video.mp4"></video>');
|
||||
expect(result).toEqual([{
|
||||
type: 'Video',
|
||||
url: 'https://example.com/video.mp4',
|
||||
name: null,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should ignore video tags without src', () => {
|
||||
const result = extractMediaFromHtml('<video></video>');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract alt text from alt property', () => {
|
||||
const result = extractMediaFromHtml(`
|
||||
<img src="https://example.com/img.png" alt="img tag" title="wrong"/>
|
||||
<picture><img src="https://example.com/picture.png" alt="picture tag" title="wrong"/></picture>
|
||||
<object data="https://example.com/object-1.dat" alt="object tag" title="wrong"></object>
|
||||
<object><img src="https://example.com/object-2.png" alt="object tag" title="wrong"/></object>
|
||||
<embed src="https://example.com/embed.dat" alt="embed tag" title="wrong"/>
|
||||
<audio src="https://example.com/audio.mp3" alt="audio tag" title="wrong"/>
|
||||
<video src="https://example.com/video.mp4" alt="video tag" title="wrong"/>
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/img.png',
|
||||
name: 'img tag',
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/picture.png',
|
||||
name: 'picture tag',
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/object-2.png',
|
||||
name: 'object tag',
|
||||
},
|
||||
{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/object-1.dat',
|
||||
name: 'object tag',
|
||||
},
|
||||
{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/embed.dat',
|
||||
name: 'embed tag',
|
||||
},
|
||||
{
|
||||
type: 'Audio',
|
||||
url: 'https://example.com/audio.mp3',
|
||||
name: 'audio tag',
|
||||
},
|
||||
{
|
||||
type: 'Video',
|
||||
url: 'https://example.com/video.mp4',
|
||||
name: 'video tag',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should extract alt text from title property', () => {
|
||||
const result = extractMediaFromHtml(`
|
||||
<img src="https://example.com/img.png" title="img tag"/>
|
||||
<picture><img src="https://example.com/picture.png" title="picture tag"/></picture>
|
||||
<object data="https://example.com/object-1.dat" title="object tag"></object>
|
||||
<object><img src="https://example.com/object-2.png" title="object tag"/></object>
|
||||
<embed src="https://example.com/embed.dat" title="embed tag"/>
|
||||
<audio src="https://example.com/audio.mp3" title="audio tag"/>
|
||||
<video src="https://example.com/video.mp4" title="video tag"/>
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/img.png',
|
||||
name: 'img tag',
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/picture.png',
|
||||
name: 'picture tag',
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/object-2.png',
|
||||
name: 'object tag',
|
||||
},
|
||||
{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/object-1.dat',
|
||||
name: 'object tag',
|
||||
},
|
||||
{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/embed.dat',
|
||||
name: 'embed tag',
|
||||
},
|
||||
{
|
||||
type: 'Audio',
|
||||
url: 'https://example.com/audio.mp3',
|
||||
name: 'audio tag',
|
||||
},
|
||||
{
|
||||
type: 'Video',
|
||||
url: 'https://example.com/video.mp4',
|
||||
name: 'video tag',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should ignore missing alt text', () => {
|
||||
const result = extractMediaFromHtml(`
|
||||
<img src="https://example.com/img.png"/>
|
||||
<picture><img src="https://example.com/picture.png"/></picture>
|
||||
<object data="https://example.com/object-1.dat"></object>
|
||||
<object><img src="https://example.com/object-2.png"/></object>
|
||||
<embed src="https://example.com/embed.dat"/>
|
||||
<audio src="https://example.com/audio.mp3"/>
|
||||
<video src="https://example.com/video.mp4"/>
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/img.png',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/picture.png',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/object-2.png',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/object-1.dat',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/embed.dat',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Audio',
|
||||
url: 'https://example.com/audio.mp3',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Video',
|
||||
url: 'https://example.com/video.mp4',
|
||||
name: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should de-duplicate attachments', () => {
|
||||
const result = extractMediaFromHtml(`
|
||||
<img src="https://example.com/1.png" alt="img 1"/>
|
||||
<img src="https://example.com/2.png" alt="img 2"/>
|
||||
<embed src="https://example.com/1.png" alt="embed 1"/>
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Document',
|
||||
url: 'https://example.com/1.png',
|
||||
name: 'embed 1',
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/2.png',
|
||||
name: 'img 2',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { extractMediaFromMfm } from '@/core/activitypub/misc/extract-media-from-mfm.js';
|
||||
|
||||
describe(extractMediaFromMfm, () => {
|
||||
it('should return empty for empty input', () => {
|
||||
const result = extractMediaFromMfm('');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty for invalid input', () => {
|
||||
const result = extractMediaFromMfm('*broken markdown\0');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract all image links', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||

|
||||

|
||||
****
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/1.png',
|
||||
name: '1',
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/2.png',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/3.png',
|
||||
name: '3',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should ignore regular links', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||
[1](https://example.com/images/1.png)
|
||||
[](https://example.com/images/2.png)
|
||||
**[3](https://example.com/images/3.png)**
|
||||
`);
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should ignore silent links', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||
?[1](https://example.com/images/1.png)
|
||||
?[](https://example.com/images/2.png)
|
||||
**?[3](https://example.com/images/3.png)**
|
||||
`);
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract complex text', () => {
|
||||
const result = extractMediaFromMfm('');
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/image.png',
|
||||
name: 'this is an image with complex text! :owo: 💙',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should de-duplicate images', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||

|
||||

|
||||
****
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/1.png',
|
||||
name: '3',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { getContentByType } from '@/core/activitypub/misc/get-content-by-type.js';
|
||||
|
||||
describe(getContentByType, () => {
|
||||
describe('when permissive', () => {
|
||||
it('should return source.content when it matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
},
|
||||
_misskey_content: 'misskey content',
|
||||
content: 'native content',
|
||||
mediaType: 'text/x.misskeYMarkdown, text/markdown',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown', true);
|
||||
|
||||
expect(content).toBe('source content');
|
||||
});
|
||||
|
||||
it('should return _misskey_content when it matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
mediaType: 'text/plain',
|
||||
},
|
||||
_misskey_content: 'misskey content',
|
||||
content: 'native content',
|
||||
mediaType: 'text/x.misskeYMarkdown, text/markdown',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown', true);
|
||||
|
||||
expect(content).toBe('misskey content');
|
||||
});
|
||||
|
||||
it('should return content when it matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
mediaType: 'text/plain',
|
||||
},
|
||||
_misskey_content: null,
|
||||
content: 'native content',
|
||||
mediaType: 'text/x.misskeYMarkdown, text/markdown',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown', true);
|
||||
|
||||
expect(content).toBe('native content');
|
||||
});
|
||||
|
||||
it('should return null when nothing matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
mediaType: 'text/plain',
|
||||
},
|
||||
_misskey_content: null,
|
||||
content: 'native content',
|
||||
mediaType: 'text/plain',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown', true);
|
||||
|
||||
expect(content).toBe(null);
|
||||
});
|
||||
|
||||
it('should return null for invalid inputs', () => {
|
||||
const objects = [
|
||||
{},
|
||||
{ source: 'nope' },
|
||||
{ content: null },
|
||||
{ _misskey_content: 123 },
|
||||
];
|
||||
|
||||
const results = objects.map(c => getContentByType(c, 'text/misskeymarkdown', true));
|
||||
|
||||
const expected = objects.map(() => null);
|
||||
expect(results).toEqual(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe('when not permissive', () => {
|
||||
it('should return source.content when it matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
mediaType: 'text/x.misskeymarkdown',
|
||||
},
|
||||
_misskey_content: 'misskey content',
|
||||
content: 'native content',
|
||||
mediaType: 'text/x.misskeymarkdown',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown');
|
||||
|
||||
expect(content).toBe('source content');
|
||||
});
|
||||
|
||||
it('should return _misskey_content when it matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
mediaType: 'text/plain',
|
||||
},
|
||||
_misskey_content: 'misskey content',
|
||||
content: 'native content',
|
||||
mediaType: 'text/x.misskeymarkdown',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown');
|
||||
|
||||
expect(content).toBe('misskey content');
|
||||
});
|
||||
|
||||
it('should return content when it matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
mediaType: 'text/plain',
|
||||
},
|
||||
_misskey_content: null,
|
||||
content: 'native content',
|
||||
mediaType: 'text/x.misskeymarkdown',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown');
|
||||
|
||||
expect(content).toBe('native content');
|
||||
});
|
||||
|
||||
it('should return null when nothing matches', () => {
|
||||
const obj = {
|
||||
source: {
|
||||
content: 'source content',
|
||||
mediaType: 'text/plain',
|
||||
},
|
||||
_misskey_content: null,
|
||||
content: 'native content',
|
||||
mediaType: 'text/plain',
|
||||
};
|
||||
|
||||
const content = getContentByType(obj, 'text/x.misskeymarkdown');
|
||||
|
||||
expect(content).toBe(null);
|
||||
});
|
||||
|
||||
it('should return null for invalid inputs', () => {
|
||||
const objects = [
|
||||
{},
|
||||
{ source: 'nope' },
|
||||
{ content: null },
|
||||
{ _misskey_content: 123 },
|
||||
];
|
||||
|
||||
const results = objects.map(c => getContentByType(c, 'text/misskeymarkdown'));
|
||||
|
||||
const expected = objects.map(() => null);
|
||||
expect(results).toEqual(expected);
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue