factor out attachment code to reduce duplication and allow testing
This commit is contained in:
parent
b0b0218b75
commit
5430b00f72
3 changed files with 216 additions and 133 deletions
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import { load as cheerio } from 'cheerio/slim';
|
||||
import type { IApDocument } from '@/core/activitypub/type.js';
|
||||
import type { CheerioAPI } from 'cheerio/slim';
|
||||
|
||||
/**
|
||||
* Finds HTML elements representing inline media and returns them as simulated AP documents.
|
||||
* Returns an empty array if the input cannot be parsed, or no media was found.
|
||||
* @param html Input HTML to analyze.
|
||||
*/
|
||||
export function extractMediaFromHtml(html: string): IApDocument[] {
|
||||
const $ = parseHtml(html);
|
||||
if (!$) return [];
|
||||
|
||||
const attachments: IApDocument[] = [];
|
||||
|
||||
// <img> tags, including <picture> and <object> fallback elements
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img
|
||||
$('img[src]')
|
||||
.toArray()
|
||||
.forEach(img => attachments.push({
|
||||
type: 'Image',
|
||||
url: img.attribs.src,
|
||||
name: img.attribs.alt || img.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <object> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object
|
||||
$('object[data]')
|
||||
.toArray()
|
||||
.forEach(object => attachments.push({
|
||||
type: 'Document',
|
||||
url: object.attribs.data,
|
||||
name: object.attribs.alt || object.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <embed> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed
|
||||
$('embed[src]')
|
||||
.toArray()
|
||||
.forEach(embed => attachments.push({
|
||||
type: 'Document',
|
||||
url: embed.attribs.src,
|
||||
name: embed.attribs.alt || embed.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <audio> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/audio
|
||||
$('audio[src]')
|
||||
.toArray()
|
||||
.forEach(audio => attachments.push({
|
||||
type: 'Audio',
|
||||
url: audio.attribs.src,
|
||||
name: audio.attribs.alt || audio.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <video> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/video
|
||||
$('video[src]')
|
||||
.toArray()
|
||||
.forEach(audio => attachments.push({
|
||||
type: 'Video',
|
||||
url: audio.attribs.src,
|
||||
name: audio.attribs.alt || audio.attribs.title || null,
|
||||
}));
|
||||
|
||||
// TODO support <svg>? We would need to extract it directly from the HTML and save to a temp file.
|
||||
|
||||
return attachments;
|
||||
}
|
||||
|
||||
function parseHtml(html: string): CheerioAPI | null {
|
||||
try {
|
||||
return cheerio(html);
|
||||
} catch {
|
||||
// Don't worry about invalid HTML
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
import type { IPost } from '@/core/activitypub/type.js';
|
||||
import { toArray } from '@/misc/prelude/array.js';
|
||||
|
||||
/**
|
||||
* Gets content of a specified media type from a provided object.
|
||||
*
|
||||
* Optionally supports a "permissive" mode which enables the following changes:
|
||||
* 1. MIME types are checked in a case-insensitive manner.
|
||||
* 2. MIME types are matched based on inclusion, not strict equality.
|
||||
* 3. A candidate content is considered to match if it has no specified MIME type.
|
||||
*
|
||||
* Note: this method is written defensively to protect against malform remote objects.
|
||||
* When extending or modifying it, please be sure to work with "unknown" type and validate everything.
|
||||
*
|
||||
* Note: the logic in this method is carefully ordered to match the selection priority of existing code in ApNoteService.
|
||||
* Please do not re-arrange it without testing!
|
||||
* New checks can be added to the end of the method to safely extend the existing logic.
|
||||
*
|
||||
* @param object AP object to extract content from.
|
||||
* @param mimeType MIME type to look for.
|
||||
* @param permissive Enables permissive mode, as described above. Defaults to false (disabled).
|
||||
*/
|
||||
export function getContentByType(object: IPost | Record<string, unknown>, mimeType: string, permissive = false): string | null {
|
||||
// Case 1: Extended "source" property
|
||||
if (object.source && typeof(object.source) === 'object') {
|
||||
// "source" is permitted to be an array, though no implementations are known to do this yet.
|
||||
const sources = toArray(object.source) as Record<string, unknown>[];
|
||||
for (const source of sources) {
|
||||
if (typeof (source.content) === 'string' && checkMediaType(source.mediaType)) {
|
||||
return source.content;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2: Special case for MFM
|
||||
if (typeof(object._misskey_content) === 'string' && mimeType === 'text/x.misskeymarkdown') {
|
||||
return object._misskey_content;
|
||||
}
|
||||
|
||||
// Case 3: AP native "content" property
|
||||
if (typeof(object.content) === 'string' && checkMediaType(object.mediaType)) {
|
||||
return object.content;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
// Checks if the provided media type matches the input parameters.
|
||||
function checkMediaType(mediaType: unknown): boolean {
|
||||
if (typeof(mediaType) === 'string') {
|
||||
// Strict match
|
||||
if (mediaType === mimeType) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Permissive match
|
||||
if (permissive && mediaType.toLowerCase().includes(mimeType)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Permissive fallback match
|
||||
if (permissive && mediaType == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// No match
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import { forwardRef, Inject, Injectable } from '@nestjs/common';
|
||||
import { In } from 'typeorm';
|
||||
import { load as cheerio } from 'cheerio/slim';
|
||||
import { UnrecoverableError } from 'bullmq';
|
||||
import { DI } from '@/di-symbols.js';
|
||||
import type { UsersRepository, PollsRepository, EmojisRepository, NotesRepository, MiMeta } from '@/models/_.js';
|
||||
|
|
@ -28,6 +27,8 @@ import { checkHttps } from '@/misc/check-https.js';
|
|||
import { IdentifiableError } from '@/misc/identifiable-error.js';
|
||||
import { isRetryableError } from '@/misc/is-retryable-error.js';
|
||||
import { renderInlineError } from '@/misc/render-inline-error.js';
|
||||
import { extractMediaFromHtml } from '@/core/activitypub/misc/extract-media-from-html.js';
|
||||
import { getContentByType } from '@/core/activitypub/misc/get-content-by-type.js';
|
||||
import { getOneApId, getApId, validPost, isEmoji, getApType, isApObject, isDocument, IApDocument, isLink } from '../type.js';
|
||||
import { ApLoggerService } from '../ApLoggerService.js';
|
||||
import { ApMfmService } from '../ApMfmService.js';
|
||||
|
|
@ -42,7 +43,6 @@ import { ApQuestionService } from './ApQuestionService.js';
|
|||
import { ApImageService } from './ApImageService.js';
|
||||
import type { Resolver } from '../ApResolverService.js';
|
||||
import type { IObject, IPost } from '../type.js';
|
||||
import type { CheerioAPI } from 'cheerio/slim';
|
||||
|
||||
@Injectable()
|
||||
export class ApNoteService {
|
||||
|
|
@ -208,12 +208,8 @@ export class ApNoteService {
|
|||
const cw = note.summary === '' ? null : note.summary;
|
||||
|
||||
// テキストのパース
|
||||
let text: string | null = null;
|
||||
if (note.source?.mediaType === 'text/x.misskeymarkdown' && typeof note.source.content === 'string') {
|
||||
text = note.source.content;
|
||||
} else if (typeof note._misskey_content !== 'undefined') {
|
||||
text = note._misskey_content;
|
||||
} else if (typeof note.content === 'string') {
|
||||
let text = getContentByType(note, 'text/x.misskeymarkdown');
|
||||
if (text == null && typeof note.content === 'string') {
|
||||
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
||||
}
|
||||
|
||||
|
|
@ -251,31 +247,9 @@ export class ApNoteService {
|
|||
}
|
||||
|
||||
// 添付ファイル
|
||||
const files: MiDriveFile[] = [];
|
||||
|
||||
for (const attach of toArray(note.attachment)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||
const icon = getBestIcon(note);
|
||||
if (icon) {
|
||||
icon.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, icon);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Extract inline media from note content.
|
||||
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
||||
if (note.content) {
|
||||
for (const attach of extractInlineMedia(note.content)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
}
|
||||
// Note: implementation moved to getAttachment function to avoid duplication.
|
||||
// Please copy any upstream changes to that method! (It's in the bottom of this class)
|
||||
const files = await this.getAttachments(note, actor);
|
||||
|
||||
// リプライ
|
||||
const reply: MiNote | null = note.inReplyTo
|
||||
|
|
@ -424,12 +398,8 @@ export class ApNoteService {
|
|||
const cw = note.summary === '' ? null : note.summary;
|
||||
|
||||
// テキストのパース
|
||||
let text: string | null = null;
|
||||
if (note.source?.mediaType === 'text/x.misskeymarkdown' && typeof note.source.content === 'string') {
|
||||
text = note.source.content;
|
||||
} else if (typeof note._misskey_content !== 'undefined') {
|
||||
text = note._misskey_content;
|
||||
} else if (typeof note.content === 'string') {
|
||||
let text = getContentByType(note, 'text/x.misskeymarkdown');
|
||||
if (text == null && typeof note.content === 'string') {
|
||||
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
||||
}
|
||||
|
||||
|
|
@ -459,31 +429,7 @@ export class ApNoteService {
|
|||
}
|
||||
|
||||
// 添付ファイル
|
||||
const files: MiDriveFile[] = [];
|
||||
|
||||
for (const attach of toArray(note.attachment)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||
const icon = getBestIcon(note);
|
||||
if (icon) {
|
||||
icon.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, icon);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Extract inline media from note content.
|
||||
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
||||
if (note.content) {
|
||||
for (const attach of extractInlineMedia(note.content)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
}
|
||||
const files = await this.getAttachments(note, actor);
|
||||
|
||||
// リプライ
|
||||
const reply: MiNote | null = note.inReplyTo
|
||||
|
|
@ -744,6 +690,55 @@ export class ApNoteService {
|
|||
// Permanent error - return null
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts and saves all media attachments from the provided note.
|
||||
* Returns an array of all the created files.
|
||||
* TODO: suppress errors and set a processError entry instead.
|
||||
* TODO: run in parallel (with promiseLimit!)
|
||||
*/
|
||||
private async getAttachments(note: IPost, actor: MiRemoteUser): Promise<MiDriveFile[]> {
|
||||
const files: MiDriveFile[] = [];
|
||||
|
||||
for (const attach of toArray(note.attachment)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||
const icon = getBestIcon(note);
|
||||
if (icon) {
|
||||
icon.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, icon);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
|
||||
// Extract inline media from markdown content.
|
||||
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
||||
const htmlContent = getContentByType(note, 'text/html');
|
||||
if (htmlContent) {
|
||||
for (const attach of extractMediaFromHtml(htmlContent)) {
|
||||
attach.sensitive ??= note.sensitive;
|
||||
const file = await this.apImageService.resolveImage(actor, attach);
|
||||
if (file) files.push(file);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract inline media from markdown content.
|
||||
// TODO We first need to implement support for "!" prefix in sfm-js.
|
||||
// That will be implemented as part of https://activitypub.software/TransFem-org/Sharkey/-/issues/1105
|
||||
// const markdownContent = getContentByType(note, 'text/markdown') || text;
|
||||
// if (markdownContent) {
|
||||
// for (const attach of extractMediaFromMarkdown(markdownContent)) {
|
||||
// attach.sensitive ??= note.sensitive;
|
||||
// const file = await this.apImageService.resolveImage(actor, attach);
|
||||
// if (file) files.push(file);
|
||||
// }
|
||||
// }
|
||||
|
||||
return files;
|
||||
}
|
||||
}
|
||||
|
||||
function getBestIcon(note: IObject): IObject | null {
|
||||
|
|
@ -764,72 +759,3 @@ function getBestIcon(note: IObject): IObject | null {
|
|||
}, null as IApDocument | null) ?? null;
|
||||
}
|
||||
|
||||
function extractInlineMedia(html: string): IApDocument[] {
|
||||
const $ = parseHtml(html);
|
||||
if (!$) return [];
|
||||
|
||||
const attachments: IApDocument[] = [];
|
||||
|
||||
// <img> tags, including <picture> and <object> fallback elements
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img
|
||||
$('img[src]')
|
||||
.toArray()
|
||||
.forEach(img => attachments.push({
|
||||
type: 'Image',
|
||||
url: img.attribs.src,
|
||||
name: img.attribs.alt || img.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <object> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object
|
||||
$('object[data]')
|
||||
.toArray()
|
||||
.forEach(object => attachments.push({
|
||||
type: 'Document',
|
||||
url: object.attribs.data,
|
||||
name: object.attribs.alt || object.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <embed> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed
|
||||
$('embed[src]')
|
||||
.toArray()
|
||||
.forEach(embed => attachments.push({
|
||||
type: 'Document',
|
||||
url: embed.attribs.src,
|
||||
name: embed.attribs.alt || embed.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <audio> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/audio
|
||||
$('audio[src]')
|
||||
.toArray()
|
||||
.forEach(audio => attachments.push({
|
||||
type: 'Audio',
|
||||
url: audio.attribs.src,
|
||||
name: audio.attribs.alt || audio.attribs.title || null,
|
||||
}));
|
||||
|
||||
// <video> tags
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/video
|
||||
$('video[src]')
|
||||
.toArray()
|
||||
.forEach(audio => attachments.push({
|
||||
type: 'Video',
|
||||
url: audio.attribs.src,
|
||||
name: audio.attribs.alt || audio.attribs.title || null,
|
||||
}));
|
||||
|
||||
// TODO support <svg>? we will need to extract it directly from the HTML.
|
||||
|
||||
return attachments;
|
||||
}
|
||||
|
||||
function parseHtml(html: string): CheerioAPI | null {
|
||||
try {
|
||||
return cheerio(html);
|
||||
} catch {
|
||||
// Don't worry about invalid HTML
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue