Skip to content

Commit c3f9db7

Browse files
authored
Import Mastodon self-replies as comments (#2572)
1 parent 2177671 commit c3f9db7

File tree

3 files changed

+693
-65
lines changed

3 files changed

+693
-65
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Significance: minor
2+
Type: added
3+
4+
Mastodon importer now imports self-replies as comments, preserving thread structure.

includes/wp-admin/import/class-mastodon.php

Lines changed: 232 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,21 @@ public static function import() {
272272
/**
273273
* Process posts.
274274
*
275+
* Uses a multi-pass approach:
276+
* 1. Categorize posts into regular posts and self-replies.
277+
* 2. Import regular posts (root posts and external replies) as WordPress posts.
278+
* 3. Import self-replies as comments on their parent posts.
279+
*
275280
* @return true|\WP_Error True on success, WP_Error on failure.
276281
*/
277282
public static function import_posts() {
278283
$skipped = array();
279284
$imported = 0;
280285

286+
// Pass 1: Categorize posts.
287+
$posts_to_import = array();
288+
$self_replies = array();
289+
281290
foreach ( self::$outbox['orderedItems'] as $post ) {
282291
// Skip boosts.
283292
if ( 'Announce' === $post['type'] ) {
@@ -288,92 +297,250 @@ public static function import_posts() {
288297
continue;
289298
}
290299

291-
// @todo: Skip replies to comments and import them as comments.
292-
293-
$post_data = array(
294-
'post_author' => self::$author,
295-
'post_date' => $post['published'],
296-
'post_excerpt' => $post['object']['summary'] ?? '',
297-
'post_content' => $post['object']['content'],
298-
'post_status' => 'publish',
299-
'post_type' => 'post',
300-
'meta_input' => array( '_source_id' => $post['object']['id'] ),
301-
'tags_input' => \array_map(
302-
function ( $tag ) {
303-
if ( 'Hashtag' === $tag['type'] ) {
304-
return \ltrim( $tag['name'], '#' );
305-
}
306-
307-
return '';
308-
},
309-
$post['object']['tag'] ?? array()
310-
),
311-
);
312-
313-
/**
314-
* Filter the post data before inserting it into the database.
315-
*
316-
* @param array $post_data The post data to be inserted.
317-
* @param array $post The Mastodon Create activity.
318-
*/
319-
$post_data = \apply_filters( 'activitypub_import_mastodon_post_data', $post_data, $post );
320-
321-
$post_exists = \post_exists( '', $post_data['post_content'], $post_data['post_date'], $post_data['post_type'] );
322-
323-
/**
324-
* Filter ID of the existing post corresponding to post currently importing.
325-
*
326-
* Return 0 to force the post to be imported. Filter the ID to be something else
327-
* to override which existing post is mapped to the imported post.
328-
*
329-
* @see post_exists()
330-
*
331-
* @param int $post_exists Post ID, or 0 if post did not exist.
332-
* @param array $post_data The post array to be inserted.
333-
*/
334-
$post_exists = \apply_filters( 'wp_import_existing_post', $post_exists, $post_data );
335-
336-
if ( $post_exists ) {
337-
$skipped[] = $post['object']['id'];
338-
continue;
300+
if ( self::is_self_reply( $post ) ) {
301+
$self_replies[] = $post;
302+
} else {
303+
// Root posts and external replies are imported as WordPress posts.
304+
$posts_to_import[] = $post;
339305
}
306+
}
340307

341-
$post_id = \wp_insert_post( $post_data, true );
308+
// Pass 2: Import regular posts as WordPress posts.
309+
$source_to_post_id = array();
310+
foreach ( $posts_to_import as $post ) {
311+
$result = self::import_as_post( $post );
342312

343-
if ( \is_wp_error( $post_id ) ) {
344-
return $post_id;
313+
if ( \is_wp_error( $result ) ) {
314+
return $result;
345315
}
346316

347-
\set_post_format( $post_id, 'status' );
348-
349-
// Process attachments if enabled.
350-
if ( self::$fetch_attachments && ! empty( $post['object']['attachment'] ) ) {
351-
// Prepend archive path to attachment URLs for local files.
352-
$attachments = array_map( array( self::class, 'prepend_archive_path' ), $post['object']['attachment'] );
353-
354-
Attachments::import( $attachments, $post_id, self::$author );
317+
if ( $result ) {
318+
$source_to_post_id[ $post['object']['id'] ] = $result;
319+
++$imported;
320+
} else {
321+
$skipped[] = $post['object']['id'];
355322
}
323+
}
356324

357-
// phpcs:ignore
358-
if ( $post_id && isset( $post['object']['replies']['first']['next'] ) ) {
359-
// @todo: Import replies as comments.
325+
// Pass 3: Import self-replies as comments (sorted by date for correct threading).
326+
\usort(
327+
$self_replies,
328+
static function ( $a, $b ) {
329+
return \strtotime( $a['published'] ) <=> \strtotime( $b['published'] );
360330
}
331+
);
332+
333+
$source_to_comment_id = array();
334+
$comments_skipped = array();
335+
$comments_imported = 0;
361336

362-
++$imported;
337+
foreach ( $self_replies as $post ) {
338+
$result = self::import_as_comment( $post, $source_to_post_id, $source_to_comment_id );
339+
340+
if ( $result ) {
341+
++$comments_imported;
342+
} else {
343+
$comments_skipped[] = $post['object']['id'];
344+
}
363345
}
364346

347+
// Output results.
365348
if ( ! empty( $skipped ) ) {
366349
echo '<p>' . \esc_html__( 'Skipped posts:', 'activitypub' ) . '<br>';
367-
echo wp_kses( implode( '<br>', $skipped ), array( 'br' => array() ) );
350+
echo \wp_kses( \implode( '<br>', $skipped ), array( 'br' => array() ) );
368351
echo '</p>';
369352
}
370353

371-
/* translators: %d: Number of posts */
354+
if ( ! empty( $comments_skipped ) ) {
355+
echo '<p>' . \esc_html__( 'Skipped comments:', 'activitypub' ) . '<br>';
356+
echo \wp_kses( \implode( '<br>', $comments_skipped ), array( 'br' => array() ) );
357+
echo '</p>';
358+
}
359+
360+
/* translators: %s: Number of posts */
372361
echo '<p>' . \esc_html( \sprintf( \_n( 'Imported %s post.', 'Imported %s posts.', $imported, 'activitypub' ), \number_format_i18n( $imported ) ) ) . '</p>';
373362

363+
if ( $comments_imported > 0 ) {
364+
/* translators: %s: Number of comments */
365+
echo '<p>' . \esc_html( \sprintf( \_n( 'Imported %s comment from self-reply threads.', 'Imported %s comments from self-reply threads.', $comments_imported, 'activitypub' ), \number_format_i18n( $comments_imported ) ) ) . '</p>';
366+
}
367+
374368
return true;
375369
}
376370

371+
/**
372+
* Check if a post is a self-reply (thread continuation).
373+
*
374+
* A self-reply is when a user replies to their own post, creating a thread.
375+
*
376+
* @param array $post The Mastodon activity.
377+
*
378+
* @return bool True if replying to own post.
379+
*/
380+
private static function is_self_reply( $post ) {
381+
if ( empty( $post['object']['inReplyTo'] ) ) {
382+
return false;
383+
}
384+
385+
/*
386+
* Compare base URLs (actor URL should be a prefix of inReplyTo for self-replies).
387+
*
388+
* Example:
389+
* - actor: https://mastodon.social/users/example
390+
* - inReplyTo: https://mastodon.social/users/example/statuses/123
391+
*
392+
* Adding a trailing slash ensures we don't match partial usernames
393+
* (e.g., "example" shouldn't match "example2").
394+
*/
395+
return \str_starts_with( $post['object']['inReplyTo'], \rtrim( $post['actor'], '/' ) . '/' );
396+
}
397+
398+
/**
399+
* Import a single activity as a WordPress post.
400+
*
401+
* @param array $post The Mastodon activity.
402+
*
403+
* @return int|false|\WP_Error Post ID on success, false if skipped, WP_Error on failure.
404+
*/
405+
private static function import_as_post( $post ) {
406+
$post_data = array(
407+
'post_author' => self::$author,
408+
'post_date' => $post['published'],
409+
'post_excerpt' => $post['object']['summary'] ?? '',
410+
'post_content' => $post['object']['content'],
411+
'post_status' => 'publish',
412+
'post_type' => 'post',
413+
'meta_input' => array( '_source_id' => $post['object']['id'] ),
414+
'tags_input' => \array_map(
415+
function ( $tag ) {
416+
if ( 'Hashtag' === $tag['type'] ) {
417+
return \ltrim( $tag['name'], '#' );
418+
}
419+
420+
return '';
421+
},
422+
$post['object']['tag'] ?? array()
423+
),
424+
);
425+
426+
/**
427+
* Filter the post data before inserting it into the database.
428+
*
429+
* @param array $post_data The post data to be inserted.
430+
* @param array $post The Mastodon Create activity.
431+
*/
432+
$post_data = \apply_filters( 'activitypub_import_mastodon_post_data', $post_data, $post );
433+
434+
$post_exists = \post_exists( '', $post_data['post_content'], $post_data['post_date'], $post_data['post_type'] );
435+
436+
/**
437+
* Filter ID of the existing post corresponding to post currently importing.
438+
*
439+
* Return 0 to force the post to be imported. Filter the ID to be something else
440+
* to override which existing post is mapped to the imported post.
441+
*
442+
* @see post_exists()
443+
*
444+
* @param int $post_exists Post ID, or 0 if post did not exist.
445+
* @param array $post_data The post array to be inserted.
446+
*/
447+
$post_exists = \apply_filters( 'wp_import_existing_post', $post_exists, $post_data );
448+
449+
if ( $post_exists ) {
450+
return false;
451+
}
452+
453+
$post_id = \wp_insert_post( $post_data, true );
454+
455+
if ( \is_wp_error( $post_id ) ) {
456+
return $post_id;
457+
}
458+
459+
\set_post_format( $post_id, 'status' );
460+
461+
// Process attachments if enabled.
462+
if ( self::$fetch_attachments && ! empty( $post['object']['attachment'] ) ) {
463+
// Prepend archive path to attachment URLs for local files.
464+
$attachments = \array_map( array( self::class, 'prepend_archive_path' ), $post['object']['attachment'] );
465+
466+
Attachments::import( $attachments, $post_id, self::$author );
467+
}
468+
469+
return $post_id;
470+
}
471+
472+
/**
473+
* Import a self-reply as a comment on its parent post.
474+
*
475+
* @param array $post The Mastodon activity.
476+
* @param array $source_to_post_id Mapping of source IDs to WordPress post IDs.
477+
* @param array $source_to_comment_id Mapping of source IDs to WordPress comment IDs (passed by reference).
478+
*
479+
* @return int|false Comment ID on success, false if parent not found or skipped.
480+
*/
481+
private static function import_as_comment( $post, $source_to_post_id, &$source_to_comment_id ) {
482+
$in_reply_to = $post['object']['inReplyTo'];
483+
484+
// Find parent - could be a post or another comment.
485+
$parent_post_id = null;
486+
$parent_comment_id = 0;
487+
488+
if ( isset( $source_to_post_id[ $in_reply_to ] ) ) {
489+
// Replying to a root post or external reply.
490+
$parent_post_id = $source_to_post_id[ $in_reply_to ];
491+
} elseif ( isset( $source_to_comment_id[ $in_reply_to ] ) ) {
492+
// Replying to another comment (nested thread).
493+
$parent_comment_id = $source_to_comment_id[ $in_reply_to ];
494+
$parent_comment = \get_comment( $parent_comment_id );
495+
496+
if ( $parent_comment ) {
497+
$parent_post_id = $parent_comment->comment_post_ID;
498+
}
499+
}
500+
501+
// If we couldn't find the parent, skip this comment.
502+
if ( ! $parent_post_id ) {
503+
return false;
504+
}
505+
506+
// Check for duplicate.
507+
$existing_comments = \get_comments(
508+
array(
509+
'post_id' => $parent_post_id,
510+
'meta_key' => 'source_id', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key
511+
'meta_value' => $post['object']['id'], // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_value
512+
'number' => 1,
513+
)
514+
);
515+
516+
if ( ! empty( $existing_comments ) ) {
517+
// Already imported, add to mapping and skip.
518+
$source_to_comment_id[ $post['object']['id'] ] = $existing_comments[0]->comment_ID;
519+
520+
return false;
521+
}
522+
523+
$comment_data = array(
524+
'comment_post_ID' => $parent_post_id,
525+
'comment_parent' => $parent_comment_id,
526+
'comment_author' => \get_the_author_meta( 'display_name', self::$author ),
527+
'comment_content' => $post['object']['content'],
528+
'comment_date' => $post['published'],
529+
'user_id' => self::$author,
530+
'comment_approved' => 1,
531+
);
532+
533+
$comment_id = \wp_insert_comment( $comment_data );
534+
535+
if ( $comment_id ) {
536+
\update_comment_meta( $comment_id, 'source_id', $post['object']['id'] );
537+
538+
$source_to_comment_id[ $post['object']['id'] ] = $comment_id;
539+
}
540+
541+
return $comment_id;
542+
}
543+
377544
/**
378545
* Header.
379546
*/

0 commit comments

Comments
 (0)