Skip to content

Backend tracking #57

@rpominov

Description

@rpominov

This is not really a feature request, just want to share my code just in case you'll want to implement tracking internally at backend instead of via browser extension.

I've used this code to collect this data: https://rpominov.github.io/twitter-tracking/

The code is based on what's happening in the browser when a tweet is open in incognito mode.

const fs = require("fs");

// Edit this...
const tweets = [
  "https://twitter.com/P_Kallioniemi/status/1674360288445964288",
  "https://twitter.com/elonmusk/status/1710538090173837603",
];

const DEBUG = false;

const logResponse = (resp) => {
  if (DEBUG) {
    console.log(
      resp.url,
      resp.status,
      Object.fromEntries(resp.headers.entries())
    );
  }
};

const delay = (seconds) =>
  new Promise((resolve) => setTimeout(resolve, seconds * 1000));

const shuffle = (array) => {
  const order = Object.fromEntries(array.map((x) => [x, Math.random()]));
  array.sort((a, b) => order[a] - order[b]);
};

const retry = async (fn, retries = 5) => {
  try {
    return await fn();
  } catch (e) {
    if (retries > 0) {
      console.log(e, `[Retrying... (${retries})]`);
      await delay(20);
      return await retry(fn, retries - 1);
    }
    throw e;
  }
};

// don't know how to get this programmatically,
// but maybe this one will work forever
const authorizationToken =
  "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA";

// hope this won't change
const apiBase = "https://twitter.com/i/api/graphql/mbnjGF4gOwo5gyp9pe5s4A";

let guestId = null;
let guestToken = null;
const authenticate = async () => {
  console.log("Authenticating...");

  const tweetUrl = tweets[0];

  // id shouldn't expire, so we fetch it just once
  if (guestId === null) {
    const guestId = await retry(async () => {
      const resp = await fetch(tweetUrl, { redirect: "manual" });
      logResponse(resp);
      const setCookie = resp.headers.get("set-cookie");
      if (!setCookie.startsWith("guest_id=")) {
        throw new Error("No guest id cookie");
      }
      return setCookie.split(";")[0].split("=")[1];
    });
  }

  return retry(async () => {
    const resp = await fetch(tweetUrl, {
      headers: { cookie: `guest_id=${guestId}` },
    });
    logResponse(resp);

    const text = await resp.text();
    const match = text.match(/gt=(\d+);/);

    if (!match) {
      throw new Error(text);
    }

    guestToken = text.match(/gt=(\d+);/)[1];
  });
};

const track = async (tweetUrl) => {
  if (!guestToken) {
    await authenticate();
  }

  const tweetId = tweetUrl.split("/").at(-1);

  const params = new URLSearchParams();

  params.set(
    "variables",
    JSON.stringify({
      tweetId,
      withCommunity: false,
      includePromotedContent: false,
      withVoice: false,
    })
  );

  params.set(
    "features",
    JSON.stringify({
      creator_subscriptions_tweet_preview_api_enabled: true,
      tweetypie_unmention_optimization_enabled: true,
      responsive_web_edit_tweet_api_enabled: true,
      graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
      view_counts_everywhere_api_enabled: true,
      longform_notetweets_consumption_enabled: true,
      responsive_web_twitter_article_tweet_consumption_enabled: false,
      tweet_awards_web_tipping_enabled: false,
      responsive_web_home_pinned_timelines_enabled: false,
      freedom_of_speech_not_reach_fetch_enabled: true,
      standardized_nudges_misinfo: true,
      tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
      longform_notetweets_rich_text_read_enabled: true,
      longform_notetweets_inline_media_enabled: true,
      responsive_web_graphql_exclude_directive_enabled: true,
      verified_phone_label_enabled: false,
      responsive_web_media_download_video_enabled: false,
      responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
      responsive_web_graphql_timeline_navigation_enabled: true,
      responsive_web_enhance_cards_enabled: false,
    })
  );

  await retry(async () => {
    const response = await fetch(`${apiBase}/TweetResultByRestId?${params}`, {
      headers: {
        authorization: `Bearer ${authorizationToken}`,
        "cache-control": "no-cache",
        "content-type": "application/json",
        pragma: "no-cache",
        "x-guest-token": guestToken,
        "x-twitter-active-user": "yes",
        "x-twitter-client-language": "en-GB",
      },
    });
    logResponse(response);

    if (response.headers.get("x-rate-limit-remaining") === "1") {
      guestToken = null;
    }

    const text = await response.text();

    let json = null;
    try {
      json = JSON.parse(text);
    } catch (e) {}

    if (json === null) {
      throw new Error(`${response.status} ${text}`);
    }

    if (json.errors) {
      // guest token expired, recoverable error
      if (json.errors[0].code === 239) {
        guestToken = null;
        return;
      }

      throw new Error(JSON.stringify(json.errors));
    }

    const filename = `./${tweetId}_${new Date().getTime()}.json`;
    fs.writeFileSync(filename, JSON.stringify(json));
    console.log(`Wrote ${filename}`);
  });
};

const main = async () => {
  shuffle(tweets);
  for (const tweet of tweets) {
    await track(tweet);
    await delay(Math.random() * 10 + 5);
  }
};

main();
setInterval(main, 1000 * 60 * 3);

If authorizationToken and apiBase stop working, here's the request I got them from (filter requests by graphql after opening a tweet in incognito):

Screenshot 2023-10-15 at 18 07 10

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions