extract inline images from MFM and Markdown
This commit is contained in:
parent
5e46efe60d
commit
c3a6ba93ca
3 changed files with 165 additions and 14 deletions
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { parse, inspect, extract } from 'mfm-js';
|
||||
import type { IApDocument } from '@/core/activitypub/type.js';
|
||||
import type { MfmNode, MfmText } from 'mfm-js';
|
||||
|
||||
/**
|
||||
* Finds MFM notes representing inline media and returns them as simulated AP documents.
|
||||
* Returns an empty array if the input cannot be parsed, or no media was found.
|
||||
* @param mfm Input MFM to analyze.
|
||||
*/
|
||||
export function extractMediaFromMfm(mfm: string): IApDocument[] {
|
||||
const nodes = parseMfm(mfm);
|
||||
if (nodes == null) return [];
|
||||
|
||||
const attachments = new Map<string, IApDocument>();
|
||||
|
||||
inspect(nodes, node => {
|
||||
if (node.type === 'link' && node.props.image) {
|
||||
const alt: string[] = [];
|
||||
|
||||
inspect(node.children, node => {
|
||||
switch (node.type) {
|
||||
case 'text':
|
||||
alt.push(node.props.text);
|
||||
break;
|
||||
case 'unicodeEmoji':
|
||||
alt.push(node.props.emoji);
|
||||
break;
|
||||
case 'emojiCode':
|
||||
alt.push(':');
|
||||
alt.push(node.props.name);
|
||||
alt.push(':');
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
attachments.set(node.props.url, {
|
||||
type: 'Image',
|
||||
url: node.props.url,
|
||||
name: alt.length > 0
|
||||
? alt.join('')
|
||||
: null,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return Array.from(attachments.values());
|
||||
}
|
||||
|
||||
function parseMfm(mfm: string): MfmNode[] | null {
|
||||
try {
|
||||
return parse(mfm);
|
||||
} catch {
|
||||
// Don't worry about invalid MFM
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ import { IdentifiableError } from '@/misc/identifiable-error.js';
|
|||
import { isRetryableError } from '@/misc/is-retryable-error.js';
|
||||
import { renderInlineError } from '@/misc/render-inline-error.js';
|
||||
import { extractMediaFromHtml } from '@/core/activitypub/misc/extract-media-from-html.js';
|
||||
import { extractMediaFromMfm } from '@/core/activitypub/misc/extract-media-from-mfm.js';
|
||||
import { getContentByType } from '@/core/activitypub/misc/get-content-by-type.js';
|
||||
import { getOneApId, getApId, validPost, isEmoji, getApType, isApObject, isDocument, IApDocument, isLink } from '../type.js';
|
||||
import { ApLoggerService } from '../ApLoggerService.js';
|
||||
|
|
@ -724,20 +725,17 @@ export class ApNoteService {
|
|||
}
|
||||
}
|
||||
|
||||
// Extract inline media from markdown content.
|
||||
// TODO We first need to implement support for "!" prefix in sfm-js.
|
||||
// That will be implemented as part of https://activitypub.software/TransFem-org/Sharkey/-/issues/1105
|
||||
// const markdownContent =
|
||||
// getContentByType(note, 'text/x.misskeymarkdown') ??
|
||||
// getContentByType(note, 'text/markdown');
|
||||
// if (markdownContent) {
|
||||
// for (const attach of extractMediaFromMarkdown(markdownContent)) {
|
||||
// if (hasUrl(attach)) {
|
||||
// attach.sensitive ??= note.sensitive;
|
||||
// attachments.set(attach.url, attach);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// Extract inline media from MFM / markdown content.
|
||||
const mfmContent =
|
||||
getContentByType(note, 'text/x.misskeymarkdown') ??
|
||||
getContentByType(note, 'text/markdown');
|
||||
if (mfmContent) {
|
||||
for (const attach of extractMediaFromMfm(mfmContent)) {
|
||||
if (hasUrl(attach)) {
|
||||
attachments.set(attach.url, attach);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||
const icon = getBestIcon(note);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { extractMediaFromMfm } from '@/core/activitypub/misc/extract-media-from-mfm.js';
|
||||
|
||||
describe(extractMediaFromMfm, () => {
|
||||
it('should return empty for empty input', () => {
|
||||
const result = extractMediaFromMfm('');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty for invalid input', () => {
|
||||
const result = extractMediaFromMfm('*broken markdown\0');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract all image links', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||

|
||||

|
||||
****
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/1.png',
|
||||
name: '1',
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/2.png',
|
||||
name: null,
|
||||
},
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/3.png',
|
||||
name: '3',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should ignore regular links', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||
[1](https://example.com/images/1.png)
|
||||
[](https://example.com/images/2.png)
|
||||
**[3](https://example.com/images/3.png)**
|
||||
`);
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should ignore silent links', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||
?[1](https://example.com/images/1.png)
|
||||
?[](https://example.com/images/2.png)
|
||||
**?[3](https://example.com/images/3.png)**
|
||||
`);
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract complex text', () => {
|
||||
const result = extractMediaFromMfm('');
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/image.png',
|
||||
name: 'this is an image with complex text! :owo: 💙',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should de-duplicate images', () => {
|
||||
const result = extractMediaFromMfm(`
|
||||

|
||||

|
||||
****
|
||||
`);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
type: 'Image',
|
||||
url: 'https://example.com/images/1.png',
|
||||
name: '3',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue