effect-sizes.qmd

---
title: "Polygenic risk score effect sizes"
---

<details>

<summary>Metrics explanation</summary>

<dl>

<dt>AUC</dt>

<dd>Area Under Receiver Operating Characteristic</dd>

<dd>This metric cares only about relative ordering of observations, and is available only for binary traits.</dd>

<dt>β</dt>

<dd>Standardized regression coefficients. For continuous traits, this is the change in the trait (in standard deviations) per standard deviation of the PGS. For binary traits, this is the change in the log-odds per standard deviation of the PGS.</dd>

<dd>This is the metric that was used for meta-analyses.</dd>

<dt>Odds Ratio (OR)</dt>

<dd>This is the change in the odds ratio per standard deviation of the PGS (exp(β))</dd>

<dt>R²</dt>

<dd>This is the variance explained by the PGS on the observed scale for continuous traits, or on the liability scale for binary traits</dd>

</dl>

</details>

This interactive plot corresponds to Figure 2A and Supplementary Figures 1-5 in the published article.


::: {.callout-tip}

We're fetching score and performance metadata from the [PGS Catalog](https://www.pgscatalog.org/rest/). It might take a few seconds to fully load!

:::


```{ojs setup}
//| echo: false

import { aq, op } from '@uwdata/arquero'

d3 = require('d3')

async function* fetchPaginatedData(baseUrl, paramName, paramValue) {
  const results = [];
  let hasMore = true;
  let url = new URL(baseUrl);

  // Set the single query parameter
  url.searchParams.set(paramName, paramValue);

  while (hasMore) {
    try {
      const response = await d3.json(url.toString());

      if (response && response.results) {
        results.push(...response.results);
        yield results;

        if (response.next) {
          hasMore = true;
          url = new URL(response.next);
        } else {
          hasMore = false;
        }
      } else {
        hasMore = false;
      }
    } catch (error) {
      console.error("Error fetching data:", error);
      hasMore = false;
    }
  }
}
```

```{ojs, get_performance}
//| echo: false

// parse performance metrics here into a consistent structure
performanceResults = fetchPaginatedData(
  "https://www.pgscatalog.org/rest/performance/search",
  "pgp_id",
  "PGP000517"
);

raw_metrics = aq.from(performanceResults)
  .select("id", "associated_pgs_id", "sampleset", "performance_metrics")
  .derive({
    cohort_name_full: d => d.sampleset.samples[0].cohorts[0].name_full,
    cohort_name_short: d => d.sampleset.samples[0].cohorts[0].name_short,
    ancestry: d=> d.sampleset.samples[0].ancestry_broad,
    effect_sizes: d=> d.performance_metrics.effect_sizes,
    othermetrics: d=> d.performance_metrics.othermetrics,
    class_acc: d=> d.performance_metrics.class_acc
  })

cols = ["id", "associated_pgs_id", "cohort_name_full", "cohort_name_short", "ancestry", "metric_type", "value", "value_ci_lower", "value_ci_upper"]

effect_sizes = raw_metrics
  .filter(d => op.length(d.effect_sizes) > 0)
  .unroll("effect_sizes")
  .derive({
    metric_type: d=> d.effect_sizes.name_short,
    value: d=> d.effect_sizes.estimate,
    value_ci_lower: d=> d.effect_sizes.ci_lower,
    value_ci_upper: d=> d.effect_sizes.ci_upper
  })
  .select(cols)

othermetrics = raw_metrics
  .filter(d => op.length(d.othermetrics) > 0)
  .unroll("othermetrics")
  .derive({
    metric_type: d => d.othermetrics.name_short,
    value: d=> d.othermetrics.estimate,
    value_ci_lower: d=> d.othermetrics.ci_lower,
    value_ci_upper: d=> d.othermetrics.ci_upper
  })
  .select(cols)
  
class_acc = raw_metrics
  .filter(d => op.length(d.class_acc) > 0)
  .unroll("class_acc")
  .derive({
    metric_type: d => d.class_acc.name_short,
    value: d=> d.class_acc.estimate,
    value_ci_lower: d=> d.class_acc.ci_lower,
    value_ci_upper: d=> d.class_acc.ci_upper
  })
  .select(cols)
```


```{ojs, get_score}
//| echo: false

// parse score data here including method names and tuning types
scoreResults = fetchPaginatedData(
  "https://www.pgscatalog.org/rest/score/search",
  "pgp_id",
  "PGP000517"
);

// notes: method name and tuning type is extracted from the PGS names, which isn't ideal
// recode pt_clump_nested as pt_clump
// UKBB_EnsPGS is always CV
scoreData = aq.from(scoreResults)
  .select("id", "name", "trait_reported")
  .spread({ name: d => op.split(d.name, '.') }, {as: ["method_name", "tuning_type"], limit: 2})
  .derive({ method_name: d => d.method_name === "pt_clump_nested" ? "pt_clump" : d.method_name,
            tuning_type: d => d.method_name === "UKBB_EnsPGS" ? "CV" : d.tuning_type })
```


```{ojs, ui_elements}
//| echo: false
//| panel: sidebar

biobank_names = plotData.select("cohort_name_full", "cohort_name_short").dedupe().objects()
viewof biobanks = Inputs.checkbox(biobank_names, {label: "Biobank", format: x => `${x.cohort_name_full} (${x.cohort_name_short})`, value: biobank_names} )

method_names = plotData.select("method_name").dedupe().objects()
viewof methods = Inputs.checkbox(method_names, { label: "Method:", format: x => x.method_name, value: method_names })

ancestry_names = plotData.select("ancestry").dedupe().objects()
viewof ancestries = Inputs.checkbox(ancestry_names, { label: "Genetic ancestry", format: x => x.ancestry, value: ancestry_names})

// some metrics for endpoints have no data, so must be disabled in the UI
metric_names = plotData.select("metric_type").dedupe().objects().map((x) => x.metric_type )
all_available_metrics = plotData.select("metric_type").dedupe()
enabled_metrics = plotData.params(endpoint).filter((d, $) => d.trait_reported == $.trait_reported).select("metric_type")
disabled_metrics = all_available_metrics.dedupe("metric_type").antijoin(enabled_metrics, "metric_type").objects().map((x) => x.metric_type )

viewof chosen_metric = Inputs.radio(metric_names, {label: "Metric", disabled: disabled_metrics, value: "β" })

endpoints = plotData.select("trait_reported").dedupe().objects()
viewof endpoint = Inputs.select(endpoints, {value: endpoints.find(t => t.trait_reported === "Type 2 diabetes (T2D)"), format: x => x.trait_reported, label: "Endpoint:"})

viewof best_method = Inputs.toggle({ label: "Filter best method?:", values: [["best"], ["best", "notbest"]]})
```

```{ojs, filter_data}
//| echo: false

// prepare pgs catalog data for plotting
plotData = effect_sizes.concat(othermetrics, class_acc)
  .join_left(scoreData, (a, b) => op.equal(a.associated_pgs_id, b.id), [[aq.all()], ["method_name", "tuning_type", "trait_reported"]])

filtered = plotData.params({ biobanks: biobanks.map((x) => x.cohort_name_short), 
                  methods: methods.map((x) => x.method_name),
                  ancestry: ancestries.map((x) => x.ancestry),
                  endpoint: endpoint.trait_reported,
                  metric_type: chosen_metric,
                  best_method: best_method
  })
  .filter((d, $) => op.includes($.biobanks, d.cohort_name_short) &&
    op.includes($.methods, d.method_name) &&
    op.includes($.ancestry, d.ancestry) &&
    $.endpoint === d.trait_reported &&
    $.metric_type == d.metric_type) 
  .groupby(["cohort_name_short", "ancestry", "trait_reported"])
  .derive( { best_method: d => d.value == op.max(d.value) ? "best": "notbest" })
  .filter((d, $) => op.includes($.best_method, d.best_method ))    
```

```{ojs, plot}
//| echo: false
//| panel: fill
Plot.plot({
  grid: true,
  // make plot accessible for screen readers
  ariaLabel: "Explanation of some stuff", // todo: fix
  marginTop: 50,
  marginLeft: 150,
  marginRight: 80,
  x: {
    label: chosen_metric + " →",
    nice: true
  },
  y: {
    label: "Development method"
  },  
  // tableau10 theme, manually assigning each method a colour to be consistent
  color: {
    domain: ["dbslmm", "sbayesr", "lassosum", "prscs", "ldpred2", "megaprs", "pt_clump", "UKBB_EnsPGS"],
    range: ["#5778a4", "#e49444", "#d1615d", "#85b6b2", "#6a9f58", "#e7ca60", "#a87c9f", "#f1a2a9"]
  },
  symbol: {
    legend: true
  },
  style: {
    fontSize: "12px"
  },
  facet: {
    data: filtered, 
    y: "cohort_name_short", 
    x: "ancestry",
    marginRight: 75,
    marginTop: 50
  },
  fx: {
    label: "Genetic ancestry"
  },
  fy: {
    label: "Biobank"
  },
  marks: [
    Plot.frame(),
    // link == error bar
    Plot.link(filtered, {
      x1: "value_ci_lower",
      x2: "value_ci_upper",
      y1: "method_name",
      y2: "method_name"
    }),
    Plot.dot(filtered, {
      x: "value", 
      y: "method_name",
      fill: "method_name",
      symbol: "tuning_type"
    })
  ]
})

```

## Conclusion

Using an ensemble of scores, each created using a different PGS development method, can capture a larger effect size for a phenotype. This means that the strength of the relationship between genetic data and phenotype is greater.