diff --git a/code/round-size-quantiles.ipynb b/code/round-size-quantiles.ipynb
new file mode 100644
index 0000000..b542a05
--- /dev/null
+++ b/code/round-size-quantiles.ipynb
@@ -0,0 +1,655 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Show round sizes for various margins at various quantiles\n",
+ "Compare with Table 1 from [BRAVO](https://www.usenix.org/system/files/conference/evtwote12/evtwote12-final27.pdf)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Setup and define some utilities"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from athena.audit import Audit\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def athena_sample_sizes(\n",
+ " risk_limit: float,\n",
+ " p_w: float,\n",
+ " p_r: float,\n",
+ " sample_w: int,\n",
+ " sample_r: int,\n",
+ " p_completion: float,\n",
+ " ballots_cast: int=100000\n",
+ ") -> int:\n",
+ " \"\"\"\n",
+ " Return Athena round size based on completion probability, assuming the election outcome is correct.\n",
+ " TODO: refactor to pass in integer vote shares to allow more exact calculations, and handle\n",
+ " sampling without replacement.\n",
+ "\n",
+ " Inputs:\n",
+ " risk_limit - the risk-limit for this audit\n",
+ " p_w - the fraction of vote share for the winner\n",
+ " p_r - the fraction of vote share for the loser\n",
+ " sample_w - the number of votes for the winner that have already\n",
+ " been sampled\n",
+ " sample_r - the number of votes for the runner-up that have\n",
+ " already been sampled\n",
+ " p_completion - the desired chance of completion in one round,\n",
+ " if the outcome is correct\n",
+ "\n",
+ " Outputs:\n",
+ " sample_size - the expected sample size for the given chance\n",
+ " of completion in one round\n",
+ "\n",
+ " >>> athena_sample_sizes(0.1, 0.6, 0.4, 56, 56, 0.7)\n",
+ " 244\n",
+ " \"\"\"\n",
+ "\n",
+ " # calculate the undiluted \"two-way\" share of votes for the winner\n",
+ " p_wr = p_w + p_r\n",
+ " p_w2 = p_w / p_wr\n",
+ "\n",
+ " a = int(ballots_cast * p_w2)\n",
+ " b = ballots_cast - a\n",
+ " pstop_goal = [p_completion]\n",
+ " if sample_w or sample_r:\n",
+ " round_sizes = [sample_w + sample_r]\n",
+ " else:\n",
+ " round_sizes = []\n",
+ " election = {\n",
+ " \"alpha\": risk_limit,\n",
+ " \"delta\": 1.0,\n",
+ " \"candidates\": [\"A\", \"B\"],\n",
+ " \"results\": [a, b],\n",
+ " \"ballots_cast\": ballots_cast,\n",
+ " \"winners\": 1,\n",
+ " \"name\": 'pure_pair',\n",
+ " \"model\": 'bin',\n",
+ " \"pstop_goal\": pstop_goal,\n",
+ " }\n",
+ "\n",
+ " a = Audit(\"athena\", election['alpha'], election['delta'])\n",
+ " a.add_election(election)\n",
+ " a.add_round_schedule(round_sizes)\n",
+ " if round_sizes:\n",
+ " r = a.find_risk([sample_w])\n",
+ " below_kmin = max(r['required']) - max(r['observed'])\n",
+ " else:\n",
+ " below_kmin = 0\n",
+ " x = a.find_next_round_size(pstop_goal)\n",
+ " next_round_size_0 = x['future_round_sizes'][0]\n",
+ "\n",
+ " next_round_size = next_round_size_0 + 2 * below_kmin\n",
+ "\n",
+ " return next_round_size"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def ss(m, p_completion=0.9, risk_limit=0.1, ballots_cast=100000):\n",
+ " return athena_sample_sizes(risk_limit, 0.5+m/2, 0.5-m/2, 0, 0, p_completion, ballots_cast)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "margins = [.4, .3, .2, .16, .1, 0.08, 0.06, 0.04, 0.02, 0.01]\n",
+ "pstops = [0.25, 0.5, 0.75, 0.9, 0.95, 0.99]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "With 1,000,000 ballots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab1m = {pstop: {m: ss(m, pstop, ballots_cast=1000000) for m in margins} for pstop in pstops}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0.25 | \n",
+ " 0.50 | \n",
+ " 0.75 | \n",
+ " 0.90 | \n",
+ " 0.95 | \n",
+ " 0.99 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0.40 | \n",
+ " 14 | \n",
+ " 18 | \n",
+ " 31 | \n",
+ " 42 | \n",
+ " 58 | \n",
+ " 183 | \n",
+ "
\n",
+ " \n",
+ " | 0.30 | \n",
+ " 22 | \n",
+ " 30 | \n",
+ " 52 | \n",
+ " 77 | \n",
+ " 114 | \n",
+ " 229 | \n",
+ "
\n",
+ " \n",
+ " | 0.20 | \n",
+ " 44 | \n",
+ " 75 | \n",
+ " 122 | \n",
+ " 184 | \n",
+ " 274 | \n",
+ " 732 | \n",
+ "
\n",
+ " \n",
+ " | 0.16 | \n",
+ " 72 | \n",
+ " 120 | \n",
+ " 190 | \n",
+ " 282 | \n",
+ " 427 | \n",
+ " 854 | \n",
+ "
\n",
+ " \n",
+ " | 0.10 | \n",
+ " 182 | \n",
+ " 290 | \n",
+ " 484 | \n",
+ " 710 | \n",
+ " 1098 | \n",
+ " 2930 | \n",
+ "
\n",
+ " \n",
+ " | 0.08 | \n",
+ " 278 | \n",
+ " 429 | \n",
+ " 720 | \n",
+ " 1111 | \n",
+ " 1830 | \n",
+ " 3418 | \n",
+ "
\n",
+ " \n",
+ " | 0.06 | \n",
+ " 493 | \n",
+ " 786 | \n",
+ " 1258 | \n",
+ " 1976 | \n",
+ " 3418 | \n",
+ " 6836 | \n",
+ "
\n",
+ " \n",
+ " | 0.04 | \n",
+ " 1088 | \n",
+ " 1719 | \n",
+ " 2832 | \n",
+ " 4450 | \n",
+ " 6836 | \n",
+ " 13672 | \n",
+ "
\n",
+ " \n",
+ " | 0.02 | \n",
+ " 4248 | \n",
+ " 6880 | \n",
+ " 11331 | \n",
+ " 17807 | \n",
+ " 27344 | \n",
+ " 54688 | \n",
+ "
\n",
+ " \n",
+ " | 0.01 | \n",
+ " 16796 | \n",
+ " 27522 | \n",
+ " 45330 | \n",
+ " 71234 | \n",
+ " 109375 | \n",
+ " 218750 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0.25 0.50 0.75 0.90 0.95 0.99\n",
+ "0.40 14 18 31 42 58 183\n",
+ "0.30 22 30 52 77 114 229\n",
+ "0.20 44 75 122 184 274 732\n",
+ "0.16 72 120 190 282 427 854\n",
+ "0.10 182 290 484 710 1098 2930\n",
+ "0.08 278 429 720 1111 1830 3418\n",
+ "0.06 493 786 1258 1976 3418 6836\n",
+ "0.04 1088 1719 2832 4450 6836 13672\n",
+ "0.02 4248 6880 11331 17807 27344 54688\n",
+ "0.01 16796 27522 45330 71234 109375 218750"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.DataFrame(tab1m)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Note that results vary by number of ballots, for some reason"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "27344"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ss(0.02, p_completion=0.95, ballots_cast=1000000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "28125"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ss(0.02, p_completion=0.95, ballots_cast=100000)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "With 100000 ballots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:root:FULL RECOUNT is suggested!\n",
+ "WARNING:root:Probability of stopping at: [100000] is 0.9427219856932207\n",
+ "WARNING:root:FULL RECOUNT is suggested!\n",
+ "WARNING:root:Probability of stopping at: [100000] is 0.9427219856932207\n"
+ ]
+ }
+ ],
+ "source": [
+ "tab100k = {pstop: {m: ss(m, pstop, ballots_cast=100000) for m in margins} for pstop in pstops}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0.25 | \n",
+ " 0.50 | \n",
+ " 0.75 | \n",
+ " 0.90 | \n",
+ " 0.95 | \n",
+ " 0.99 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0.40 | \n",
+ " 14 | \n",
+ " 18 | \n",
+ " 31 | \n",
+ " 42 | \n",
+ " 72 | \n",
+ " 146 | \n",
+ "
\n",
+ " \n",
+ " | 0.30 | \n",
+ " 22 | \n",
+ " 30 | \n",
+ " 52 | \n",
+ " 77 | \n",
+ " 122 | \n",
+ " 292 | \n",
+ "
\n",
+ " \n",
+ " | 0.20 | \n",
+ " 44 | \n",
+ " 75 | \n",
+ " 122 | \n",
+ " 184 | \n",
+ " 292 | \n",
+ " 586 | \n",
+ "
\n",
+ " \n",
+ " | 0.16 | \n",
+ " 72 | \n",
+ " 120 | \n",
+ " 190 | \n",
+ " 282 | \n",
+ " 439 | \n",
+ " 1172 | \n",
+ "
\n",
+ " \n",
+ " | 0.10 | \n",
+ " 182 | \n",
+ " 290 | \n",
+ " 484 | \n",
+ " 710 | \n",
+ " 1172 | \n",
+ " 2344 | \n",
+ "
\n",
+ " \n",
+ " | 0.08 | \n",
+ " 278 | \n",
+ " 429 | \n",
+ " 720 | \n",
+ " 1111 | \n",
+ " 1758 | \n",
+ " 4688 | \n",
+ "
\n",
+ " \n",
+ " | 0.06 | \n",
+ " 493 | \n",
+ " 786 | \n",
+ " 1258 | \n",
+ " 1976 | \n",
+ " 3028 | \n",
+ " 6055 | \n",
+ "
\n",
+ " \n",
+ " | 0.04 | \n",
+ " 1088 | \n",
+ " 1719 | \n",
+ " 2832 | \n",
+ " 4450 | \n",
+ " 7031 | \n",
+ " 18750 | \n",
+ "
\n",
+ " \n",
+ " | 0.02 | \n",
+ " 4248 | \n",
+ " 6880 | \n",
+ " 11331 | \n",
+ " 17807 | \n",
+ " 28125 | \n",
+ " 75000 | \n",
+ "
\n",
+ " \n",
+ " | 0.01 | \n",
+ " 16796 | \n",
+ " 27522 | \n",
+ " 45330 | \n",
+ " 71234 | \n",
+ " 100000 | \n",
+ " 100000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0.25 0.50 0.75 0.90 0.95 0.99\n",
+ "0.40 14 18 31 42 72 146\n",
+ "0.30 22 30 52 77 122 292\n",
+ "0.20 44 75 122 184 292 586\n",
+ "0.16 72 120 190 282 439 1172\n",
+ "0.10 182 290 484 710 1172 2344\n",
+ "0.08 278 429 720 1111 1758 4688\n",
+ "0.06 493 786 1258 1976 3028 6055\n",
+ "0.04 1088 1719 2832 4450 7031 18750\n",
+ "0.02 4248 6880 11331 17807 28125 75000\n",
+ "0.01 16796 27522 45330 71234 100000 100000"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.DataFrame(tab100k)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For testing...."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TestResults(failed=0, attempted=1)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import doctest\n",
+ "doctest.testmod()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "#logging.basicConfig(level=\"DEBUG\")\n",
+ "# logging.basicConfig(level=\"WARNING\")\n",
+ "logging.getLogger().setLevel(\"WARNING\")\n",
+ "#logging.getLogger(__name__).setLevel(\"DEBUG\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "logging.getLogger()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "logging.getLogger(__name__)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ },
+ "toc": {
+ "colors": {
+ "hover_highlight": "#DAA520",
+ "running_highlight": "#FF0000",
+ "selected_highlight": "#FFD700"
+ },
+ "moveMenuLeft": true,
+ "nav_menu": {
+ "height": "117px",
+ "width": "252px"
+ },
+ "navigate_menu": true,
+ "number_sections": true,
+ "sideBar": true,
+ "threshold": 4,
+ "toc_cell": false,
+ "toc_section_display": "block",
+ "toc_window_display": false,
+ "widenNotebook": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}