From 5430b00f724f6571fca919f0b3cc43eba89bf2ea Mon Sep 17 00:00:00 2001 From: Hazelnoot Date: Fri, 13 Jun 2025 12:56:14 -0400 Subject: [PATCH] factor out attachment code to reduce duplication and allow testing --- .../misc/extract-media-from-html.ts | 83 ++++++++ .../activitypub/misc/get-content-by-type.ts | 74 +++++++ .../core/activitypub/models/ApNoteService.ts | 192 ++++++------------ 3 files changed, 216 insertions(+), 133 deletions(-) create mode 100644 packages/backend/src/core/activitypub/misc/extract-media-from-html.ts create mode 100644 packages/backend/src/core/activitypub/misc/get-content-by-type.ts diff --git a/packages/backend/src/core/activitypub/misc/extract-media-from-html.ts b/packages/backend/src/core/activitypub/misc/extract-media-from-html.ts new file mode 100644 index 0000000000..9e898e965b --- /dev/null +++ b/packages/backend/src/core/activitypub/misc/extract-media-from-html.ts @@ -0,0 +1,83 @@ +/* + * SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +import { load as cheerio } from 'cheerio/slim'; +import type { IApDocument } from '@/core/activitypub/type.js'; +import type { CheerioAPI } from 'cheerio/slim'; + +/** + * Finds HTML elements representing inline media and returns them as simulated AP documents. + * Returns an empty array if the input cannot be parsed, or no media was found. + * @param html Input HTML to analyze. + */ +export function extractMediaFromHtml(html: string): IApDocument[] { + const $ = parseHtml(html); + if (!$) return []; + + const attachments: IApDocument[] = []; + + // tags, including and fallback elements + // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img + $('img[src]') + .toArray() + .forEach(img => attachments.push({ + type: 'Image', + url: img.attribs.src, + name: img.attribs.alt || img.attribs.title || null, + })); + + // tags + // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object + $('object[data]') + .toArray() + .forEach(object => attachments.push({ + type: 'Document', + url: object.attribs.data, + name: object.attribs.alt || object.attribs.title || null, + })); + + // tags + // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed + $('embed[src]') + .toArray() + .forEach(embed => attachments.push({ + type: 'Document', + url: embed.attribs.src, + name: embed.attribs.alt || embed.attribs.title || null, + })); + + //