diff --git a/packages/backend/src/core/activitypub/misc/extract-media-from-mfm.ts b/packages/backend/src/core/activitypub/misc/extract-media-from-mfm.ts new file mode 100644 index 0000000000..795de8a30b --- /dev/null +++ b/packages/backend/src/core/activitypub/misc/extract-media-from-mfm.ts @@ -0,0 +1,61 @@ +/* + * SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +import { parse, inspect, extract } from 'mfm-js'; +import type { IApDocument } from '@/core/activitypub/type.js'; +import type { MfmNode, MfmText } from 'mfm-js'; + +/** + * Finds MFM notes representing inline media and returns them as simulated AP documents. + * Returns an empty array if the input cannot be parsed, or no media was found. + * @param mfm Input MFM to analyze. + */ +export function extractMediaFromMfm(mfm: string): IApDocument[] { + const nodes = parseMfm(mfm); + if (nodes == null) return []; + + const attachments = new Map(); + + inspect(nodes, node => { + if (node.type === 'link' && node.props.image) { + const alt: string[] = []; + + inspect(node.children, node => { + switch (node.type) { + case 'text': + alt.push(node.props.text); + break; + case 'unicodeEmoji': + alt.push(node.props.emoji); + break; + case 'emojiCode': + alt.push(':'); + alt.push(node.props.name); + alt.push(':'); + break; + } + }); + + attachments.set(node.props.url, { + type: 'Image', + url: node.props.url, + name: alt.length > 0 + ? alt.join('') + : null, + }); + } + }); + + return Array.from(attachments.values()); +} + +function parseMfm(mfm: string): MfmNode[] | null { + try { + return parse(mfm); + } catch { + // Don't worry about invalid MFM + return null; + } +} diff --git a/packages/backend/src/core/activitypub/models/ApNoteService.ts b/packages/backend/src/core/activitypub/models/ApNoteService.ts index fa98102116..d9950653fc 100644 --- a/packages/backend/src/core/activitypub/models/ApNoteService.ts +++ b/packages/backend/src/core/activitypub/models/ApNoteService.ts @@ -29,6 +29,7 @@ import { IdentifiableError } from '@/misc/identifiable-error.js'; import { isRetryableError } from '@/misc/is-retryable-error.js'; import { renderInlineError } from '@/misc/render-inline-error.js'; import { extractMediaFromHtml } from '@/core/activitypub/misc/extract-media-from-html.js'; +import { extractMediaFromMfm } from '@/core/activitypub/misc/extract-media-from-mfm.js'; import { getContentByType } from '@/core/activitypub/misc/get-content-by-type.js'; import { getOneApId, getApId, validPost, isEmoji, getApType, isApObject, isDocument, IApDocument, isLink } from '../type.js'; import { ApLoggerService } from '../ApLoggerService.js'; @@ -724,20 +725,17 @@ export class ApNoteService { } } - // Extract inline media from markdown content. - // TODO We first need to implement support for "!" prefix in sfm-js. - // That will be implemented as part of https://activitypub.software/TransFem-org/Sharkey/-/issues/1105 - // const markdownContent = - // getContentByType(note, 'text/x.misskeymarkdown') ?? - // getContentByType(note, 'text/markdown'); - // if (markdownContent) { - // for (const attach of extractMediaFromMarkdown(markdownContent)) { - // if (hasUrl(attach)) { - // attach.sensitive ??= note.sensitive; - // attachments.set(attach.url, attach); - // } - // } - // } + // Extract inline media from MFM / markdown content. + const mfmContent = + getContentByType(note, 'text/x.misskeymarkdown') ?? + getContentByType(note, 'text/markdown'); + if (mfmContent) { + for (const attach of extractMediaFromMfm(mfmContent)) { + if (hasUrl(attach)) { + attachments.set(attach.url, attach); + } + } + } // Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment" const icon = getBestIcon(note); diff --git a/packages/backend/test/unit/core/activitypub/misc/extract-media-from-mfm.ts b/packages/backend/test/unit/core/activitypub/misc/extract-media-from-mfm.ts new file mode 100644 index 0000000000..c87335331f --- /dev/null +++ b/packages/backend/test/unit/core/activitypub/misc/extract-media-from-mfm.ts @@ -0,0 +1,92 @@ +/* + * SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +import { extractMediaFromMfm } from '@/core/activitypub/misc/extract-media-from-mfm.js'; + +describe(extractMediaFromMfm, () => { + it('should return empty for empty input', () => { + const result = extractMediaFromMfm(''); + expect(result).toEqual([]); + }); + + it('should return empty for invalid input', () => { + const result = extractMediaFromMfm('*broken markdown\0'); + expect(result).toEqual([]); + }); + + it('should extract all image links', () => { + const result = extractMediaFromMfm(` + ![1](https://example.com/images/1.png) + ![](https://example.com/images/2.png) + **![3](https://example.com/images/3.png)** + `); + + expect(result).toEqual([ + { + type: 'Image', + url: 'https://example.com/images/1.png', + name: '1', + }, + { + type: 'Image', + url: 'https://example.com/images/2.png', + name: null, + }, + { + type: 'Image', + url: 'https://example.com/images/3.png', + name: '3', + }, + ]); + }); + + it('should ignore regular links', () => { + const result = extractMediaFromMfm(` + [1](https://example.com/images/1.png) + [](https://example.com/images/2.png) + **[3](https://example.com/images/3.png)** + `); + + expect(result).toEqual([]); + }); + + it('should ignore silent links', () => { + const result = extractMediaFromMfm(` + ?[1](https://example.com/images/1.png) + ?[](https://example.com/images/2.png) + **?[3](https://example.com/images/3.png)** + `); + + expect(result).toEqual([]); + }); + + it('should extract complex text', () => { + const result = extractMediaFromMfm('![this is an **image** with *complex* text! :owo: 💙](https://example.com/image.png)'); + + expect(result).toEqual([ + { + type: 'Image', + url: 'https://example.com/image.png', + name: 'this is an image with complex text! :owo: 💙', + }, + ]); + }); + + it('should de-duplicate images', () => { + const result = extractMediaFromMfm(` + ![1](https://example.com/images/1.png) + ![](https://example.com/images/1.png) + **![3](https://example.com/images/1.png)** + `); + + expect(result).toEqual([ + { + type: 'Image', + url: 'https://example.com/images/1.png', + name: '3', + }, + ]); + }); +});