factor out attachment code to reduce duplication and allow testing
This commit is contained in:
parent
b0b0218b75
commit
5430b00f72
3 changed files with 216 additions and 133 deletions
|
|
@ -0,0 +1,83 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { load as cheerio } from 'cheerio/slim';
|
||||||
|
import type { IApDocument } from '@/core/activitypub/type.js';
|
||||||
|
import type { CheerioAPI } from 'cheerio/slim';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds HTML elements representing inline media and returns them as simulated AP documents.
|
||||||
|
* Returns an empty array if the input cannot be parsed, or no media was found.
|
||||||
|
* @param html Input HTML to analyze.
|
||||||
|
*/
|
||||||
|
export function extractMediaFromHtml(html: string): IApDocument[] {
|
||||||
|
const $ = parseHtml(html);
|
||||||
|
if (!$) return [];
|
||||||
|
|
||||||
|
const attachments: IApDocument[] = [];
|
||||||
|
|
||||||
|
// <img> tags, including <picture> and <object> fallback elements
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img
|
||||||
|
$('img[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(img => attachments.push({
|
||||||
|
type: 'Image',
|
||||||
|
url: img.attribs.src,
|
||||||
|
name: img.attribs.alt || img.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <object> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object
|
||||||
|
$('object[data]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(object => attachments.push({
|
||||||
|
type: 'Document',
|
||||||
|
url: object.attribs.data,
|
||||||
|
name: object.attribs.alt || object.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <embed> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed
|
||||||
|
$('embed[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(embed => attachments.push({
|
||||||
|
type: 'Document',
|
||||||
|
url: embed.attribs.src,
|
||||||
|
name: embed.attribs.alt || embed.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <audio> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/audio
|
||||||
|
$('audio[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(audio => attachments.push({
|
||||||
|
type: 'Audio',
|
||||||
|
url: audio.attribs.src,
|
||||||
|
name: audio.attribs.alt || audio.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <video> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/video
|
||||||
|
$('video[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(audio => attachments.push({
|
||||||
|
type: 'Video',
|
||||||
|
url: audio.attribs.src,
|
||||||
|
name: audio.attribs.alt || audio.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// TODO support <svg>? We would need to extract it directly from the HTML and save to a temp file.
|
||||||
|
|
||||||
|
return attachments;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseHtml(html: string): CheerioAPI | null {
|
||||||
|
try {
|
||||||
|
return cheerio(html);
|
||||||
|
} catch {
|
||||||
|
// Don't worry about invalid HTML
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: hazelnoot and other Sharkey contributors
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { IPost } from '@/core/activitypub/type.js';
|
||||||
|
import { toArray } from '@/misc/prelude/array.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets content of a specified media type from a provided object.
|
||||||
|
*
|
||||||
|
* Optionally supports a "permissive" mode which enables the following changes:
|
||||||
|
* 1. MIME types are checked in a case-insensitive manner.
|
||||||
|
* 2. MIME types are matched based on inclusion, not strict equality.
|
||||||
|
* 3. A candidate content is considered to match if it has no specified MIME type.
|
||||||
|
*
|
||||||
|
* Note: this method is written defensively to protect against malform remote objects.
|
||||||
|
* When extending or modifying it, please be sure to work with "unknown" type and validate everything.
|
||||||
|
*
|
||||||
|
* Note: the logic in this method is carefully ordered to match the selection priority of existing code in ApNoteService.
|
||||||
|
* Please do not re-arrange it without testing!
|
||||||
|
* New checks can be added to the end of the method to safely extend the existing logic.
|
||||||
|
*
|
||||||
|
* @param object AP object to extract content from.
|
||||||
|
* @param mimeType MIME type to look for.
|
||||||
|
* @param permissive Enables permissive mode, as described above. Defaults to false (disabled).
|
||||||
|
*/
|
||||||
|
export function getContentByType(object: IPost | Record<string, unknown>, mimeType: string, permissive = false): string | null {
|
||||||
|
// Case 1: Extended "source" property
|
||||||
|
if (object.source && typeof(object.source) === 'object') {
|
||||||
|
// "source" is permitted to be an array, though no implementations are known to do this yet.
|
||||||
|
const sources = toArray(object.source) as Record<string, unknown>[];
|
||||||
|
for (const source of sources) {
|
||||||
|
if (typeof (source.content) === 'string' && checkMediaType(source.mediaType)) {
|
||||||
|
return source.content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 2: Special case for MFM
|
||||||
|
if (typeof(object._misskey_content) === 'string' && mimeType === 'text/x.misskeymarkdown') {
|
||||||
|
return object._misskey_content;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 3: AP native "content" property
|
||||||
|
if (typeof(object.content) === 'string' && checkMediaType(object.mediaType)) {
|
||||||
|
return object.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// Checks if the provided media type matches the input parameters.
|
||||||
|
function checkMediaType(mediaType: unknown): boolean {
|
||||||
|
if (typeof(mediaType) === 'string') {
|
||||||
|
// Strict match
|
||||||
|
if (mediaType === mimeType) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Permissive match
|
||||||
|
if (permissive && mediaType.toLowerCase().includes(mimeType)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Permissive fallback match
|
||||||
|
if (permissive && mediaType == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No match
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -5,7 +5,6 @@
|
||||||
|
|
||||||
import { forwardRef, Inject, Injectable } from '@nestjs/common';
|
import { forwardRef, Inject, Injectable } from '@nestjs/common';
|
||||||
import { In } from 'typeorm';
|
import { In } from 'typeorm';
|
||||||
import { load as cheerio } from 'cheerio/slim';
|
|
||||||
import { UnrecoverableError } from 'bullmq';
|
import { UnrecoverableError } from 'bullmq';
|
||||||
import { DI } from '@/di-symbols.js';
|
import { DI } from '@/di-symbols.js';
|
||||||
import type { UsersRepository, PollsRepository, EmojisRepository, NotesRepository, MiMeta } from '@/models/_.js';
|
import type { UsersRepository, PollsRepository, EmojisRepository, NotesRepository, MiMeta } from '@/models/_.js';
|
||||||
|
|
@ -28,6 +27,8 @@ import { checkHttps } from '@/misc/check-https.js';
|
||||||
import { IdentifiableError } from '@/misc/identifiable-error.js';
|
import { IdentifiableError } from '@/misc/identifiable-error.js';
|
||||||
import { isRetryableError } from '@/misc/is-retryable-error.js';
|
import { isRetryableError } from '@/misc/is-retryable-error.js';
|
||||||
import { renderInlineError } from '@/misc/render-inline-error.js';
|
import { renderInlineError } from '@/misc/render-inline-error.js';
|
||||||
|
import { extractMediaFromHtml } from '@/core/activitypub/misc/extract-media-from-html.js';
|
||||||
|
import { getContentByType } from '@/core/activitypub/misc/get-content-by-type.js';
|
||||||
import { getOneApId, getApId, validPost, isEmoji, getApType, isApObject, isDocument, IApDocument, isLink } from '../type.js';
|
import { getOneApId, getApId, validPost, isEmoji, getApType, isApObject, isDocument, IApDocument, isLink } from '../type.js';
|
||||||
import { ApLoggerService } from '../ApLoggerService.js';
|
import { ApLoggerService } from '../ApLoggerService.js';
|
||||||
import { ApMfmService } from '../ApMfmService.js';
|
import { ApMfmService } from '../ApMfmService.js';
|
||||||
|
|
@ -42,7 +43,6 @@ import { ApQuestionService } from './ApQuestionService.js';
|
||||||
import { ApImageService } from './ApImageService.js';
|
import { ApImageService } from './ApImageService.js';
|
||||||
import type { Resolver } from '../ApResolverService.js';
|
import type { Resolver } from '../ApResolverService.js';
|
||||||
import type { IObject, IPost } from '../type.js';
|
import type { IObject, IPost } from '../type.js';
|
||||||
import type { CheerioAPI } from 'cheerio/slim';
|
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ApNoteService {
|
export class ApNoteService {
|
||||||
|
|
@ -208,12 +208,8 @@ export class ApNoteService {
|
||||||
const cw = note.summary === '' ? null : note.summary;
|
const cw = note.summary === '' ? null : note.summary;
|
||||||
|
|
||||||
// テキストのパース
|
// テキストのパース
|
||||||
let text: string | null = null;
|
let text = getContentByType(note, 'text/x.misskeymarkdown');
|
||||||
if (note.source?.mediaType === 'text/x.misskeymarkdown' && typeof note.source.content === 'string') {
|
if (text == null && typeof note.content === 'string') {
|
||||||
text = note.source.content;
|
|
||||||
} else if (typeof note._misskey_content !== 'undefined') {
|
|
||||||
text = note._misskey_content;
|
|
||||||
} else if (typeof note.content === 'string') {
|
|
||||||
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -251,31 +247,9 @@ export class ApNoteService {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 添付ファイル
|
// 添付ファイル
|
||||||
const files: MiDriveFile[] = [];
|
// Note: implementation moved to getAttachment function to avoid duplication.
|
||||||
|
// Please copy any upstream changes to that method! (It's in the bottom of this class)
|
||||||
for (const attach of toArray(note.attachment)) {
|
const files = await this.getAttachments(note, actor);
|
||||||
attach.sensitive ??= note.sensitive;
|
|
||||||
const file = await this.apImageService.resolveImage(actor, attach);
|
|
||||||
if (file) files.push(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
|
||||||
const icon = getBestIcon(note);
|
|
||||||
if (icon) {
|
|
||||||
icon.sensitive ??= note.sensitive;
|
|
||||||
const file = await this.apImageService.resolveImage(actor, icon);
|
|
||||||
if (file) files.push(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract inline media from note content.
|
|
||||||
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
|
||||||
if (note.content) {
|
|
||||||
for (const attach of extractInlineMedia(note.content)) {
|
|
||||||
attach.sensitive ??= note.sensitive;
|
|
||||||
const file = await this.apImageService.resolveImage(actor, attach);
|
|
||||||
if (file) files.push(file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// リプライ
|
// リプライ
|
||||||
const reply: MiNote | null = note.inReplyTo
|
const reply: MiNote | null = note.inReplyTo
|
||||||
|
|
@ -424,12 +398,8 @@ export class ApNoteService {
|
||||||
const cw = note.summary === '' ? null : note.summary;
|
const cw = note.summary === '' ? null : note.summary;
|
||||||
|
|
||||||
// テキストのパース
|
// テキストのパース
|
||||||
let text: string | null = null;
|
let text = getContentByType(note, 'text/x.misskeymarkdown');
|
||||||
if (note.source?.mediaType === 'text/x.misskeymarkdown' && typeof note.source.content === 'string') {
|
if (text == null && typeof note.content === 'string') {
|
||||||
text = note.source.content;
|
|
||||||
} else if (typeof note._misskey_content !== 'undefined') {
|
|
||||||
text = note._misskey_content;
|
|
||||||
} else if (typeof note.content === 'string') {
|
|
||||||
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
text = this.apMfmService.htmlToMfm(note.content, note.tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -459,31 +429,7 @@ export class ApNoteService {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 添付ファイル
|
// 添付ファイル
|
||||||
const files: MiDriveFile[] = [];
|
const files = await this.getAttachments(note, actor);
|
||||||
|
|
||||||
for (const attach of toArray(note.attachment)) {
|
|
||||||
attach.sensitive ??= note.sensitive;
|
|
||||||
const file = await this.apImageService.resolveImage(actor, attach);
|
|
||||||
if (file) files.push(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
|
||||||
const icon = getBestIcon(note);
|
|
||||||
if (icon) {
|
|
||||||
icon.sensitive ??= note.sensitive;
|
|
||||||
const file = await this.apImageService.resolveImage(actor, icon);
|
|
||||||
if (file) files.push(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract inline media from note content.
|
|
||||||
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
|
||||||
if (note.content) {
|
|
||||||
for (const attach of extractInlineMedia(note.content)) {
|
|
||||||
attach.sensitive ??= note.sensitive;
|
|
||||||
const file = await this.apImageService.resolveImage(actor, attach);
|
|
||||||
if (file) files.push(file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// リプライ
|
// リプライ
|
||||||
const reply: MiNote | null = note.inReplyTo
|
const reply: MiNote | null = note.inReplyTo
|
||||||
|
|
@ -744,6 +690,55 @@ export class ApNoteService {
|
||||||
// Permanent error - return null
|
// Permanent error - return null
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts and saves all media attachments from the provided note.
|
||||||
|
* Returns an array of all the created files.
|
||||||
|
* TODO: suppress errors and set a processError entry instead.
|
||||||
|
* TODO: run in parallel (with promiseLimit!)
|
||||||
|
*/
|
||||||
|
private async getAttachments(note: IPost, actor: MiRemoteUser): Promise<MiDriveFile[]> {
|
||||||
|
const files: MiDriveFile[] = [];
|
||||||
|
|
||||||
|
for (const attach of toArray(note.attachment)) {
|
||||||
|
attach.sensitive ??= note.sensitive;
|
||||||
|
const file = await this.apImageService.resolveImage(actor, attach);
|
||||||
|
if (file) files.push(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some software (Peertube) attaches a thumbnail under "icon" instead of "attachment"
|
||||||
|
const icon = getBestIcon(note);
|
||||||
|
if (icon) {
|
||||||
|
icon.sensitive ??= note.sensitive;
|
||||||
|
const file = await this.apImageService.resolveImage(actor, icon);
|
||||||
|
if (file) files.push(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract inline media from markdown content.
|
||||||
|
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
||||||
|
const htmlContent = getContentByType(note, 'text/html');
|
||||||
|
if (htmlContent) {
|
||||||
|
for (const attach of extractMediaFromHtml(htmlContent)) {
|
||||||
|
attach.sensitive ??= note.sensitive;
|
||||||
|
const file = await this.apImageService.resolveImage(actor, attach);
|
||||||
|
if (file) files.push(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract inline media from markdown content.
|
||||||
|
// TODO We first need to implement support for "!" prefix in sfm-js.
|
||||||
|
// That will be implemented as part of https://activitypub.software/TransFem-org/Sharkey/-/issues/1105
|
||||||
|
// const markdownContent = getContentByType(note, 'text/markdown') || text;
|
||||||
|
// if (markdownContent) {
|
||||||
|
// for (const attach of extractMediaFromMarkdown(markdownContent)) {
|
||||||
|
// attach.sensitive ??= note.sensitive;
|
||||||
|
// const file = await this.apImageService.resolveImage(actor, attach);
|
||||||
|
// if (file) files.push(file);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
return files;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function getBestIcon(note: IObject): IObject | null {
|
function getBestIcon(note: IObject): IObject | null {
|
||||||
|
|
@ -764,72 +759,3 @@ function getBestIcon(note: IObject): IObject | null {
|
||||||
}, null as IApDocument | null) ?? null;
|
}, null as IApDocument | null) ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractInlineMedia(html: string): IApDocument[] {
|
|
||||||
const $ = parseHtml(html);
|
|
||||||
if (!$) return [];
|
|
||||||
|
|
||||||
const attachments: IApDocument[] = [];
|
|
||||||
|
|
||||||
// <img> tags, including <picture> and <object> fallback elements
|
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img
|
|
||||||
$('img[src]')
|
|
||||||
.toArray()
|
|
||||||
.forEach(img => attachments.push({
|
|
||||||
type: 'Image',
|
|
||||||
url: img.attribs.src,
|
|
||||||
name: img.attribs.alt || img.attribs.title || null,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// <object> tags
|
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object
|
|
||||||
$('object[data]')
|
|
||||||
.toArray()
|
|
||||||
.forEach(object => attachments.push({
|
|
||||||
type: 'Document',
|
|
||||||
url: object.attribs.data,
|
|
||||||
name: object.attribs.alt || object.attribs.title || null,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// <embed> tags
|
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed
|
|
||||||
$('embed[src]')
|
|
||||||
.toArray()
|
|
||||||
.forEach(embed => attachments.push({
|
|
||||||
type: 'Document',
|
|
||||||
url: embed.attribs.src,
|
|
||||||
name: embed.attribs.alt || embed.attribs.title || null,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// <audio> tags
|
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/audio
|
|
||||||
$('audio[src]')
|
|
||||||
.toArray()
|
|
||||||
.forEach(audio => attachments.push({
|
|
||||||
type: 'Audio',
|
|
||||||
url: audio.attribs.src,
|
|
||||||
name: audio.attribs.alt || audio.attribs.title || null,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// <video> tags
|
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/video
|
|
||||||
$('video[src]')
|
|
||||||
.toArray()
|
|
||||||
.forEach(audio => attachments.push({
|
|
||||||
type: 'Video',
|
|
||||||
url: audio.attribs.src,
|
|
||||||
name: audio.attribs.alt || audio.attribs.title || null,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// TODO support <svg>? we will need to extract it directly from the HTML.
|
|
||||||
|
|
||||||
return attachments;
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseHtml(html: string): CheerioAPI | null {
|
|
||||||
try {
|
|
||||||
return cheerio(html);
|
|
||||||
} catch {
|
|
||||||
// Don't worry about invalid HTML
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue