Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/changelog/2572-from-description
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: minor
Type: added

Mastodon importer now imports self-replies as comments, preserving thread structure.
297 changes: 232 additions & 65 deletions includes/wp-admin/import/class-mastodon.php
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,21 @@ public static function import() {
/**
* Process posts.
*
* Uses a multi-pass approach:
* 1. Categorize posts into regular posts and self-replies.
* 2. Import regular posts (root posts and external replies) as WordPress posts.
* 3. Import self-replies as comments on their parent posts.
*
* @return true|\WP_Error True on success, WP_Error on failure.
*/
public static function import_posts() {
$skipped = array();
$imported = 0;

// Pass 1: Categorize posts.
$posts_to_import = array();
$self_replies = array();

foreach ( self::$outbox['orderedItems'] as $post ) {
// Skip boosts.
if ( 'Announce' === $post['type'] ) {
Expand All @@ -288,92 +297,250 @@ public static function import_posts() {
continue;
}

// @todo: Skip replies to comments and import them as comments.

$post_data = array(
'post_author' => self::$author,
'post_date' => $post['published'],
'post_excerpt' => $post['object']['summary'] ?? '',
'post_content' => $post['object']['content'],
'post_status' => 'publish',
'post_type' => 'post',
'meta_input' => array( '_source_id' => $post['object']['id'] ),
'tags_input' => \array_map(
function ( $tag ) {
if ( 'Hashtag' === $tag['type'] ) {
return \ltrim( $tag['name'], '#' );
}

return '';
},
$post['object']['tag'] ?? array()
),
);

/**
* Filter the post data before inserting it into the database.
*
* @param array $post_data The post data to be inserted.
* @param array $post The Mastodon Create activity.
*/
$post_data = \apply_filters( 'activitypub_import_mastodon_post_data', $post_data, $post );

$post_exists = \post_exists( '', $post_data['post_content'], $post_data['post_date'], $post_data['post_type'] );

/**
* Filter ID of the existing post corresponding to post currently importing.
*
* Return 0 to force the post to be imported. Filter the ID to be something else
* to override which existing post is mapped to the imported post.
*
* @see post_exists()
*
* @param int $post_exists Post ID, or 0 if post did not exist.
* @param array $post_data The post array to be inserted.
*/
$post_exists = \apply_filters( 'wp_import_existing_post', $post_exists, $post_data );

if ( $post_exists ) {
$skipped[] = $post['object']['id'];
continue;
if ( self::is_self_reply( $post ) ) {
$self_replies[] = $post;
} else {
// Root posts and external replies are imported as WordPress posts.
$posts_to_import[] = $post;
}
}

$post_id = \wp_insert_post( $post_data, true );
// Pass 2: Import regular posts as WordPress posts.
$source_to_post_id = array();
foreach ( $posts_to_import as $post ) {
$result = self::import_as_post( $post );

if ( \is_wp_error( $post_id ) ) {
return $post_id;
if ( \is_wp_error( $result ) ) {
return $result;
}

\set_post_format( $post_id, 'status' );

// Process attachments if enabled.
if ( self::$fetch_attachments && ! empty( $post['object']['attachment'] ) ) {
// Prepend archive path to attachment URLs for local files.
$attachments = array_map( array( self::class, 'prepend_archive_path' ), $post['object']['attachment'] );

Attachments::import( $attachments, $post_id, self::$author );
if ( $result ) {
$source_to_post_id[ $post['object']['id'] ] = $result;
++$imported;
} else {
$skipped[] = $post['object']['id'];
}
}

// phpcs:ignore
if ( $post_id && isset( $post['object']['replies']['first']['next'] ) ) {
// @todo: Import replies as comments.
// Pass 3: Import self-replies as comments (sorted by date for correct threading).
\usort(
$self_replies,
static function ( $a, $b ) {
return \strtotime( $a['published'] ) <=> \strtotime( $b['published'] );
}
);

$source_to_comment_id = array();
$comments_skipped = array();
$comments_imported = 0;

++$imported;
foreach ( $self_replies as $post ) {
$result = self::import_as_comment( $post, $source_to_post_id, $source_to_comment_id );

if ( $result ) {
++$comments_imported;
} else {
$comments_skipped[] = $post['object']['id'];
}
}

// Output results.
if ( ! empty( $skipped ) ) {
echo '<p>' . \esc_html__( 'Skipped posts:', 'activitypub' ) . '<br>';
echo wp_kses( implode( '<br>', $skipped ), array( 'br' => array() ) );
echo \wp_kses( \implode( '<br>', $skipped ), array( 'br' => array() ) );
echo '</p>';
}

/* translators: %d: Number of posts */
if ( ! empty( $comments_skipped ) ) {
echo '<p>' . \esc_html__( 'Skipped comments:', 'activitypub' ) . '<br>';
echo \wp_kses( \implode( '<br>', $comments_skipped ), array( 'br' => array() ) );
echo '</p>';
}

/* translators: %s: Number of posts */
echo '<p>' . \esc_html( \sprintf( \_n( 'Imported %s post.', 'Imported %s posts.', $imported, 'activitypub' ), \number_format_i18n( $imported ) ) ) . '</p>';

if ( $comments_imported > 0 ) {
/* translators: %s: Number of comments */
echo '<p>' . \esc_html( \sprintf( \_n( 'Imported %s comment from self-reply threads.', 'Imported %s comments from self-reply threads.', $comments_imported, 'activitypub' ), \number_format_i18n( $comments_imported ) ) ) . '</p>';
}

return true;
}

/**
* Check if a post is a self-reply (thread continuation).
*
* A self-reply is when a user replies to their own post, creating a thread.
*
* @param array $post The Mastodon activity.
*
* @return bool True if replying to own post.
*/
private static function is_self_reply( $post ) {
if ( empty( $post['object']['inReplyTo'] ) ) {
return false;
}

/*
* Compare base URLs (actor URL should be a prefix of inReplyTo for self-replies).
*
* Example:
* - actor: https://mastodon.social/users/example
* - inReplyTo: https://mastodon.social/users/example/statuses/123
*
* Adding a trailing slash ensures we don't match partial usernames
* (e.g., "example" shouldn't match "example2").
*/
return \str_starts_with( $post['object']['inReplyTo'], \rtrim( $post['actor'], '/' ) . '/' );
}

/**
* Import a single activity as a WordPress post.
*
* @param array $post The Mastodon activity.
*
* @return int|false|\WP_Error Post ID on success, false if skipped, WP_Error on failure.
*/
private static function import_as_post( $post ) {
$post_data = array(
'post_author' => self::$author,
'post_date' => $post['published'],
'post_excerpt' => $post['object']['summary'] ?? '',
'post_content' => $post['object']['content'],
'post_status' => 'publish',
'post_type' => 'post',
'meta_input' => array( '_source_id' => $post['object']['id'] ),
'tags_input' => \array_map(
function ( $tag ) {
if ( 'Hashtag' === $tag['type'] ) {
return \ltrim( $tag['name'], '#' );
}

return '';
},
$post['object']['tag'] ?? array()
),
);

/**
* Filter the post data before inserting it into the database.
*
* @param array $post_data The post data to be inserted.
* @param array $post The Mastodon Create activity.
*/
$post_data = \apply_filters( 'activitypub_import_mastodon_post_data', $post_data, $post );

$post_exists = \post_exists( '', $post_data['post_content'], $post_data['post_date'], $post_data['post_type'] );

/**
* Filter ID of the existing post corresponding to post currently importing.
*
* Return 0 to force the post to be imported. Filter the ID to be something else
* to override which existing post is mapped to the imported post.
*
* @see post_exists()
*
* @param int $post_exists Post ID, or 0 if post did not exist.
* @param array $post_data The post array to be inserted.
*/
$post_exists = \apply_filters( 'wp_import_existing_post', $post_exists, $post_data );

if ( $post_exists ) {
return false;
}

$post_id = \wp_insert_post( $post_data, true );

if ( \is_wp_error( $post_id ) ) {
return $post_id;
}

\set_post_format( $post_id, 'status' );

// Process attachments if enabled.
if ( self::$fetch_attachments && ! empty( $post['object']['attachment'] ) ) {
// Prepend archive path to attachment URLs for local files.
$attachments = \array_map( array( self::class, 'prepend_archive_path' ), $post['object']['attachment'] );

Attachments::import( $attachments, $post_id, self::$author );
}

return $post_id;
}

/**
* Import a self-reply as a comment on its parent post.
*
* @param array $post The Mastodon activity.
* @param array $source_to_post_id Mapping of source IDs to WordPress post IDs.
* @param array $source_to_comment_id Mapping of source IDs to WordPress comment IDs (passed by reference).
*
* @return int|false Comment ID on success, false if parent not found or skipped.
*/
private static function import_as_comment( $post, $source_to_post_id, &$source_to_comment_id ) {
$in_reply_to = $post['object']['inReplyTo'];

// Find parent - could be a post or another comment.
$parent_post_id = null;
$parent_comment_id = 0;

if ( isset( $source_to_post_id[ $in_reply_to ] ) ) {
// Replying to a root post or external reply.
$parent_post_id = $source_to_post_id[ $in_reply_to ];
} elseif ( isset( $source_to_comment_id[ $in_reply_to ] ) ) {
// Replying to another comment (nested thread).
$parent_comment_id = $source_to_comment_id[ $in_reply_to ];
$parent_comment = \get_comment( $parent_comment_id );

if ( $parent_comment ) {
$parent_post_id = $parent_comment->comment_post_ID;
}
}

// If we couldn't find the parent, skip this comment.
if ( ! $parent_post_id ) {
return false;
}

// Check for duplicate.
$existing_comments = \get_comments(
array(
'post_id' => $parent_post_id,
'meta_key' => 'source_id', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key
'meta_value' => $post['object']['id'], // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_value
'number' => 1,
)
);

if ( ! empty( $existing_comments ) ) {
// Already imported, add to mapping and skip.
$source_to_comment_id[ $post['object']['id'] ] = $existing_comments[0]->comment_ID;

return false;
}

$comment_data = array(
'comment_post_ID' => $parent_post_id,
'comment_parent' => $parent_comment_id,
'comment_author' => \get_the_author_meta( 'display_name', self::$author ),
'comment_content' => $post['object']['content'],
'comment_date' => $post['published'],
'user_id' => self::$author,
'comment_approved' => 1,
);

$comment_id = \wp_insert_comment( $comment_data );

if ( $comment_id ) {
\update_comment_meta( $comment_id, 'source_id', $post['object']['id'] );

$source_to_comment_id[ $post['object']['id'] ] = $comment_id;
}

return $comment_id;
}

/**
* Header.
*/
Expand Down
Loading