From b2e94a5254f24ffc4577c7fd6697b6a1e64c80f8 Mon Sep 17 00:00:00 2001 From: Hazelnoot Date: Fri, 13 Jun 2025 13:46:57 -0400 Subject: [PATCH] de-duplicate extracted media --- .../activitypub/misc/extract-media-from-html.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/backend/src/core/activitypub/misc/extract-media-from-html.ts b/packages/backend/src/core/activitypub/misc/extract-media-from-html.ts index 9e898e965b..3816479fd3 100644 --- a/packages/backend/src/core/activitypub/misc/extract-media-from-html.ts +++ b/packages/backend/src/core/activitypub/misc/extract-media-from-html.ts @@ -16,13 +16,13 @@ export function extractMediaFromHtml(html: string): IApDocument[] { const $ = parseHtml(html); if (!$) return []; - const attachments: IApDocument[] = []; + const attachments = new Map(); // tags, including and fallback elements // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img $('img[src]') .toArray() - .forEach(img => attachments.push({ + .forEach(img => attachments.set(img.attribs.src, { type: 'Image', url: img.attribs.src, name: img.attribs.alt || img.attribs.title || null, @@ -32,7 +32,7 @@ export function extractMediaFromHtml(html: string): IApDocument[] { // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object $('object[data]') .toArray() - .forEach(object => attachments.push({ + .forEach(object => attachments.set(object.attribs.data, { type: 'Document', url: object.attribs.data, name: object.attribs.alt || object.attribs.title || null, @@ -42,7 +42,7 @@ export function extractMediaFromHtml(html: string): IApDocument[] { // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed $('embed[src]') .toArray() - .forEach(embed => attachments.push({ + .forEach(embed => attachments.set(embed.attribs.src, { type: 'Document', url: embed.attribs.src, name: embed.attribs.alt || embed.attribs.title || null, @@ -52,7 +52,7 @@ export function extractMediaFromHtml(html: string): IApDocument[] { // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/audio $('audio[src]') .toArray() - .forEach(audio => attachments.push({ + .forEach(audio => attachments.set(audio.attribs.src, { type: 'Audio', url: audio.attribs.src, name: audio.attribs.alt || audio.attribs.title || null, @@ -62,7 +62,7 @@ export function extractMediaFromHtml(html: string): IApDocument[] { // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/video $('video[src]') .toArray() - .forEach(audio => attachments.push({ + .forEach(audio => attachments.set(audio.attribs.src, { type: 'Video', url: audio.attribs.src, name: audio.attribs.alt || audio.attribs.title || null, @@ -70,7 +70,7 @@ export function extractMediaFromHtml(html: string): IApDocument[] { // TODO support ? We would need to extract it directly from the HTML and save to a temp file. - return attachments; + return Array.from(attachments.values()); } function parseHtml(html: string): CheerioAPI | null {