From eb22bc5f5dddfa35a803e06a14be3704299a8efe Mon Sep 17 00:00:00 2001 From: Hazelnoot Date: Fri, 13 Jun 2025 11:42:53 -0400 Subject: [PATCH] extract note attachments from inline HTML --- .../core/activitypub/models/ApNoteService.ts | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/packages/backend/src/core/activitypub/models/ApNoteService.ts b/packages/backend/src/core/activitypub/models/ApNoteService.ts index 2a28405121..300cdbd0a0 100644 --- a/packages/backend/src/core/activitypub/models/ApNoteService.ts +++ b/packages/backend/src/core/activitypub/models/ApNoteService.ts @@ -5,6 +5,7 @@ import { forwardRef, Inject, Injectable } from '@nestjs/common'; import { In } from 'typeorm'; +import { load as cheerio } from 'cheerio/slim'; import { UnrecoverableError } from 'bullmq'; import { DI } from '@/di-symbols.js'; import type { UsersRepository, PollsRepository, EmojisRepository, NotesRepository, MiMeta } from '@/models/_.js'; @@ -41,6 +42,7 @@ import { ApQuestionService } from './ApQuestionService.js'; import { ApImageService } from './ApImageService.js'; import type { Resolver } from '../ApResolverService.js'; import type { IObject, IPost } from '../type.js'; +import type { CheerioAPI } from 'cheerio/slim'; @Injectable() export class ApNoteService { @@ -265,6 +267,16 @@ export class ApNoteService { if (file) files.push(file); } + // Extract inline media from note content. + // Don't use source.content, _misskey_content, or anything else because those aren't HTML. + if (note.content) { + for (const attach of extractInlineMedia(note.content)) { + attach.sensitive ??= note.sensitive; + const file = await this.apImageService.resolveImage(actor, attach); + if (file) files.push(file); + } + } + // リプライ const reply: MiNote | null = note.inReplyTo ? await this.resolveNote(note.inReplyTo, { resolver }) @@ -463,6 +475,16 @@ export class ApNoteService { if (file) files.push(file); } + // Extract inline media from note content. + // Don't use source.content, _misskey_content, or anything else because those aren't HTML. + if (note.content) { + for (const attach of extractInlineMedia(note.content)) { + attach.sensitive ??= note.sensitive; + const file = await this.apImageService.resolveImage(actor, attach); + if (file) files.push(file); + } + } + // リプライ const reply: MiNote | null = note.inReplyTo ? await this.resolveNote(note.inReplyTo, { resolver }) @@ -741,3 +763,73 @@ function getBestIcon(note: IObject): IObject | null { return best; }, null as IApDocument | null) ?? null; } + +function extractInlineMedia(html: string): IApDocument[] { + const $ = parseHtml(html); + if (!$) return []; + + const attachments: IApDocument[] = []; + + // tags, including and fallback elements + // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img + $('img[src]') + .toArray() + .forEach(img => attachments.push({ + type: 'Image', + url: img.attribs.src, + name: img.attribs.alt || img.attribs.title || null, + })); + + // tags + // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object + $('object[data]') + .toArray() + .forEach(object => attachments.push({ + type: 'Document', + url: object.attribs.data, + name: object.attribs.alt || object.attribs.title || null, + })); + + // tags + // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed + $('embed[src]') + .toArray() + .forEach(embed => attachments.push({ + type: 'Document', + url: embed.attribs.src, + name: embed.attribs.alt || embed.attribs.title || null, + })); + + //