extract note attachments from inline HTML
This commit is contained in:
parent
e5593af422
commit
eb22bc5f5d
1 changed files with 92 additions and 0 deletions
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
import { forwardRef, Inject, Injectable } from '@nestjs/common';
|
import { forwardRef, Inject, Injectable } from '@nestjs/common';
|
||||||
import { In } from 'typeorm';
|
import { In } from 'typeorm';
|
||||||
|
import { load as cheerio } from 'cheerio/slim';
|
||||||
import { UnrecoverableError } from 'bullmq';
|
import { UnrecoverableError } from 'bullmq';
|
||||||
import { DI } from '@/di-symbols.js';
|
import { DI } from '@/di-symbols.js';
|
||||||
import type { UsersRepository, PollsRepository, EmojisRepository, NotesRepository, MiMeta } from '@/models/_.js';
|
import type { UsersRepository, PollsRepository, EmojisRepository, NotesRepository, MiMeta } from '@/models/_.js';
|
||||||
|
|
@ -41,6 +42,7 @@ import { ApQuestionService } from './ApQuestionService.js';
|
||||||
import { ApImageService } from './ApImageService.js';
|
import { ApImageService } from './ApImageService.js';
|
||||||
import type { Resolver } from '../ApResolverService.js';
|
import type { Resolver } from '../ApResolverService.js';
|
||||||
import type { IObject, IPost } from '../type.js';
|
import type { IObject, IPost } from '../type.js';
|
||||||
|
import type { CheerioAPI } from 'cheerio/slim';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ApNoteService {
|
export class ApNoteService {
|
||||||
|
|
@ -265,6 +267,16 @@ export class ApNoteService {
|
||||||
if (file) files.push(file);
|
if (file) files.push(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract inline media from note content.
|
||||||
|
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
||||||
|
if (note.content) {
|
||||||
|
for (const attach of extractInlineMedia(note.content)) {
|
||||||
|
attach.sensitive ??= note.sensitive;
|
||||||
|
const file = await this.apImageService.resolveImage(actor, attach);
|
||||||
|
if (file) files.push(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// リプライ
|
// リプライ
|
||||||
const reply: MiNote | null = note.inReplyTo
|
const reply: MiNote | null = note.inReplyTo
|
||||||
? await this.resolveNote(note.inReplyTo, { resolver })
|
? await this.resolveNote(note.inReplyTo, { resolver })
|
||||||
|
|
@ -463,6 +475,16 @@ export class ApNoteService {
|
||||||
if (file) files.push(file);
|
if (file) files.push(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract inline media from note content.
|
||||||
|
// Don't use source.content, _misskey_content, or anything else because those aren't HTML.
|
||||||
|
if (note.content) {
|
||||||
|
for (const attach of extractInlineMedia(note.content)) {
|
||||||
|
attach.sensitive ??= note.sensitive;
|
||||||
|
const file = await this.apImageService.resolveImage(actor, attach);
|
||||||
|
if (file) files.push(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// リプライ
|
// リプライ
|
||||||
const reply: MiNote | null = note.inReplyTo
|
const reply: MiNote | null = note.inReplyTo
|
||||||
? await this.resolveNote(note.inReplyTo, { resolver })
|
? await this.resolveNote(note.inReplyTo, { resolver })
|
||||||
|
|
@ -741,3 +763,73 @@ function getBestIcon(note: IObject): IObject | null {
|
||||||
return best;
|
return best;
|
||||||
}, null as IApDocument | null) ?? null;
|
}, null as IApDocument | null) ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function extractInlineMedia(html: string): IApDocument[] {
|
||||||
|
const $ = parseHtml(html);
|
||||||
|
if (!$) return [];
|
||||||
|
|
||||||
|
const attachments: IApDocument[] = [];
|
||||||
|
|
||||||
|
// <img> tags, including <picture> and <object> fallback elements
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/img
|
||||||
|
$('img[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(img => attachments.push({
|
||||||
|
type: 'Image',
|
||||||
|
url: img.attribs.src,
|
||||||
|
name: img.attribs.alt || img.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <object> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/object
|
||||||
|
$('object[data]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(object => attachments.push({
|
||||||
|
type: 'Document',
|
||||||
|
url: object.attribs.data,
|
||||||
|
name: object.attribs.alt || object.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <embed> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed
|
||||||
|
$('embed[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(embed => attachments.push({
|
||||||
|
type: 'Document',
|
||||||
|
url: embed.attribs.src,
|
||||||
|
name: embed.attribs.alt || embed.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <audio> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/audio
|
||||||
|
$('audio[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(audio => attachments.push({
|
||||||
|
type: 'Audio',
|
||||||
|
url: audio.attribs.src,
|
||||||
|
name: audio.attribs.alt || audio.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// <video> tags
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/video
|
||||||
|
$('video[src]')
|
||||||
|
.toArray()
|
||||||
|
.forEach(audio => attachments.push({
|
||||||
|
type: 'Video',
|
||||||
|
url: audio.attribs.src,
|
||||||
|
name: audio.attribs.alt || audio.attribs.title || null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// TODO support <svg>? we will need to extract it directly from the HTML.
|
||||||
|
|
||||||
|
return attachments;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseHtml(html: string): CheerioAPI | null {
|
||||||
|
try {
|
||||||
|
return cheerio(html);
|
||||||
|
} catch {
|
||||||
|
// Don't worry about invalid HTML
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue