|
85 | 85 | "outputs": [], |
86 | 86 | "source": [ |
87 | 87 | "# Defining date range for historical analysis\n", |
88 | | - "# NB: Vortexa data is currently available from 2016-01-01 with a maximum date range of 4 years per query \n", |
| 88 | + "# NB: Vortexa data is currently available from 2016-01-01 with a maximum date range of 4 years per query\n", |
89 | 89 | "START_DATE = datetime(2016, 1, 31)\n", |
90 | 90 | "END_DATE = datetime(2020, 1, 31)\n", |
91 | 91 | "DATE_RANGE = pd.date_range(START_DATE, END_DATE)\n", |
|
107 | 107 | "metadata": {}, |
108 | 108 | "outputs": [], |
109 | 109 | "source": [ |
110 | | - "def fetch_global_crude_floating_storage_timeseries(start_date, end_date, unit=\"t\"):\n", |
111 | | - " \n", |
| 110 | + "def fetch_global_crude_floating_storage_timeseries(\n", |
| 111 | + " start_date, end_date, unit=\"t\"\n", |
| 112 | + "):\n", |
112 | 113 | " # Find Crude/Condensates ID, ensuring its uniqueness\n", |
113 | | - " crude_and_condensates = [p.id for p in Products().search(\"crude\").to_list() if p.name==\"Crude/Condensates\"]\n", |
| 114 | + " crude_and_condensates = [\n", |
| 115 | + " p.id\n", |
| 116 | + " for p in Products().search(\"crude\").to_list()\n", |
| 117 | + " if p.name == \"Crude/Condensates\"\n", |
| 118 | + " ]\n", |
114 | 119 | " assert len(crude_and_condensates) == 1\n", |
115 | | - " \n", |
| 120 | + "\n", |
116 | 121 | " # make Vortexa API query\n", |
117 | 122 | " # NB: disable_geographic_exclusion_rules is set to True to include intra-movements\n", |
118 | | - " df_fs = CargoTimeSeries().search(timeseries_frequency=\"day\",\n", |
119 | | - " timeseries_unit=unit,\n", |
120 | | - " disable_geographic_exclusion_rules=True,\n", |
121 | | - " filter_products=crude_and_condensates,\n", |
122 | | - " filter_activity=\"storing_state\",\n", |
123 | | - " filter_time_min=start_date,\n", |
124 | | - " filter_time_max=end_date).to_df()\n", |
| 123 | + " df_fs = (\n", |
| 124 | + " CargoTimeSeries()\n", |
| 125 | + " .search(\n", |
| 126 | + " timeseries_frequency=\"day\",\n", |
| 127 | + " timeseries_unit=unit,\n", |
| 128 | + " disable_geographic_exclusion_rules=True,\n", |
| 129 | + " filter_products=crude_and_condensates,\n", |
| 130 | + " filter_activity=\"storing_state\",\n", |
| 131 | + " filter_time_min=start_date,\n", |
| 132 | + " filter_time_max=end_date,\n", |
| 133 | + " )\n", |
| 134 | + " .to_df()\n", |
| 135 | + " )\n", |
125 | 136 | "\n", |
126 | 137 | " # rename columns\n", |
127 | | - " df_fs = df_fs.rename(columns={\"key\": \"date\",\n", |
128 | | - " \"value\": unit,\n", |
129 | | - " \"count\": \"number_of_cargo_movements\"})\n", |
130 | | - " \n", |
| 138 | + " df_fs = df_fs.rename(\n", |
| 139 | + " columns={\n", |
| 140 | + " \"key\": \"date\",\n", |
| 141 | + " \"value\": unit,\n", |
| 142 | + " \"count\": \"number_of_cargo_movements\",\n", |
| 143 | + " }\n", |
| 144 | + " )\n", |
| 145 | + "\n", |
131 | 146 | " # remove time zone from timestamp\n", |
132 | 147 | " df_fs[\"date\"] = pd.to_datetime(df_fs[\"date\"]).dt.tz_localize(None)\n", |
133 | | - " \n", |
| 148 | + "\n", |
134 | 149 | " return df_fs" |
135 | 150 | ] |
136 | 151 | }, |
|
277 | 292 | } |
278 | 293 | ], |
279 | 294 | "source": [ |
280 | | - "df_fs = fetch_global_crude_floating_storage_timeseries(START_DATE, END_DATE, UNIT)\n", |
| 295 | + "df_fs = fetch_global_crude_floating_storage_timeseries(\n", |
| 296 | + " START_DATE, END_DATE, UNIT\n", |
| 297 | + ")\n", |
281 | 298 | "df_fs" |
282 | 299 | ] |
283 | 300 | }, |
|
461 | 478 | } |
462 | 479 | ], |
463 | 480 | "source": [ |
464 | | - "spot_prices = pd.read_excel(\"https://www.eia.gov/dnav/pet/xls/PET_PRI_SPT_S1_D.xls\", sheet_name=\"Data 1\", skiprows=[0,1])\n", |
465 | | - "spot_prices = spot_prices.set_index(\"Date\").fillna(method=\"ffill\").reindex(DATE_RANGE, method=\"ffill\")\n", |
| 481 | + "spot_prices = pd.read_excel(\n", |
| 482 | + " \"https://www.eia.gov/dnav/pet/xls/PET_PRI_SPT_S1_D.xls\",\n", |
| 483 | + " sheet_name=\"Data 1\",\n", |
| 484 | + " skiprows=[0, 1],\n", |
| 485 | + ")\n", |
| 486 | + "spot_prices = (\n", |
| 487 | + " spot_prices.set_index(\"Date\")\n", |
| 488 | + " .fillna(method=\"ffill\")\n", |
| 489 | + " .reindex(DATE_RANGE, method=\"ffill\")\n", |
| 490 | + ")\n", |
466 | 491 | "spot_prices" |
467 | 492 | ] |
468 | 493 | }, |
|
697 | 722 | } |
698 | 723 | ], |
699 | 724 | "source": [ |
700 | | - "future_prices = pd.read_excel(\"https://www.eia.gov/dnav/pet/xls/PET_PRI_FUT_S1_D.xls\", sheet_name=\"Data 1\", skiprows=[0,1])\n", |
701 | | - "future_prices = future_prices.set_index(\"Date\").fillna(method=\"ffill\").reindex(DATE_RANGE, method=\"ffill\")\n", |
| 725 | + "future_prices = pd.read_excel(\n", |
| 726 | + " \"https://www.eia.gov/dnav/pet/xls/PET_PRI_FUT_S1_D.xls\",\n", |
| 727 | + " sheet_name=\"Data 1\",\n", |
| 728 | + " skiprows=[0, 1],\n", |
| 729 | + ")\n", |
| 730 | + "future_prices = (\n", |
| 731 | + " future_prices.set_index(\"Date\")\n", |
| 732 | + " .fillna(method=\"ffill\")\n", |
| 733 | + " .reindex(DATE_RANGE, method=\"ffill\")\n", |
| 734 | + ")\n", |
702 | 735 | "future_prices" |
703 | 736 | ] |
704 | 737 | }, |
|
947 | 980 | } |
948 | 981 | ], |
949 | 982 | "source": [ |
950 | | - "calendar_spread.plot(title=\"Spread between Future and Spot crude oil prices\", grid=True)\n", |
| 983 | + "calendar_spread.plot(\n", |
| 984 | + " title=\"Spread between Future and Spot crude oil prices\", grid=True\n", |
| 985 | + ")\n", |
951 | 986 | "plt.xlabel(\"date\")\n", |
952 | 987 | "plt.ylabel(\"USD\");" |
953 | 988 | ] |
|
1000 | 1035 | "outputs": [], |
1001 | 1036 | "source": [ |
1002 | 1037 | "def crosscorr(series_x, series_y, lags):\n", |
1003 | | - " return pd.Series([series_y.corr(series_x.shift(lag)) for lag in lags], index=lags)" |
| 1038 | + " return pd.Series(\n", |
| 1039 | + " [series_y.corr(series_x.shift(lag)) for lag in lags], index=lags\n", |
| 1040 | + " )" |
1004 | 1041 | ] |
1005 | 1042 | }, |
1006 | 1043 | { |
|
1048 | 1085 | "outputs": [], |
1049 | 1086 | "source": [ |
1050 | 1087 | "def plot_crosscorr(series_x, series_y, maxlag, label_x, label_y):\n", |
1051 | | - " lags = np.arange(0, maxlag+1)\n", |
| 1088 | + " lags = np.arange(0, maxlag + 1)\n", |
1052 | 1089 | "\n", |
1053 | 1090 | " plt.subplot(\"211\")\n", |
1054 | 1091 | " xcorr_x_y = crosscorr(series_x, series_y, lags)\n", |
|
1088 | 1125 | } |
1089 | 1126 | ], |
1090 | 1127 | "source": [ |
1091 | | - "plot_crosscorr(calendar_spread, floating_storage, maxlag=MAXLAG, label_x=\"calendar spread\", label_y=\"floating storage\")" |
| 1128 | + "plot_crosscorr(\n", |
| 1129 | + " calendar_spread,\n", |
| 1130 | + " floating_storage,\n", |
| 1131 | + " maxlag=MAXLAG,\n", |
| 1132 | + " label_x=\"calendar spread\",\n", |
| 1133 | + " label_y=\"floating storage\",\n", |
| 1134 | + ")" |
1092 | 1135 | ] |
1093 | 1136 | }, |
1094 | 1137 | { |
|
1119 | 1162 | } |
1120 | 1163 | ], |
1121 | 1164 | "source": [ |
1122 | | - "plot_crosscorr(spot_prices, floating_storage, maxlag=MAXLAG, label_x=\"spot prices\", label_y=\"floating storage\")" |
| 1165 | + "plot_crosscorr(\n", |
| 1166 | + " spot_prices,\n", |
| 1167 | + " floating_storage,\n", |
| 1168 | + " maxlag=MAXLAG,\n", |
| 1169 | + " label_x=\"spot prices\",\n", |
| 1170 | + " label_y=\"floating storage\",\n", |
| 1171 | + ")" |
1123 | 1172 | ] |
1124 | 1173 | }, |
1125 | 1174 | { |
|
1152 | 1201 | } |
1153 | 1202 | ], |
1154 | 1203 | "source": [ |
1155 | | - "lags = np.arange(0, MAXLAG+1)\n", |
| 1204 | + "lags = np.arange(0, MAXLAG + 1)\n", |
1156 | 1205 | "\n", |
1157 | 1206 | "crosscorr(floating_storage, floating_storage, lags).plot()\n", |
1158 | 1207 | "crosscorr(calendar_spread, calendar_spread, lags).plot()\n", |
|
1214 | 1263 | } |
1215 | 1264 | ], |
1216 | 1265 | "source": [ |
1217 | | - "gct = grangercausalitytests(pd.concat([floating_storage, calendar_spread], axis=1), maxlag=2, verbose=True)" |
| 1266 | + "gct = grangercausalitytests(\n", |
| 1267 | + " pd.concat([floating_storage, calendar_spread], axis=1),\n", |
| 1268 | + " maxlag=2,\n", |
| 1269 | + " verbose=True,\n", |
| 1270 | + ")" |
1218 | 1271 | ] |
1219 | 1272 | }, |
1220 | 1273 | { |
|
1233 | 1286 | "metadata": {}, |
1234 | 1287 | "outputs": [], |
1235 | 1288 | "source": [ |
1236 | | - "def plot_granger_pvalues(series_x, series_y, maxlag, label_x, label_y, test=\"ssr_ftest\", confidence_level=0.05):\n", |
1237 | | - " lags = np.arange(1, maxlag+1)\n", |
| 1289 | + "def plot_granger_pvalues(\n", |
| 1290 | + " series_x,\n", |
| 1291 | + " series_y,\n", |
| 1292 | + " maxlag,\n", |
| 1293 | + " label_x,\n", |
| 1294 | + " label_y,\n", |
| 1295 | + " test=\"ssr_ftest\",\n", |
| 1296 | + " confidence_level=0.05,\n", |
| 1297 | + "):\n", |
| 1298 | + " lags = np.arange(1, maxlag + 1)\n", |
1238 | 1299 | "\n", |
1239 | 1300 | " plt.subplot(\"211\")\n", |
1240 | | - " gct_x_y = grangercausalitytests(pd.concat([series_y, series_x], axis=1), maxlag=maxlag, verbose=False)\n", |
1241 | | - " pvalue_x_y = pd.Series([gct_x_y[lag][0][test][1] for lag in lags], index=lags)\n", |
| 1301 | + " gct_x_y = grangercausalitytests(\n", |
| 1302 | + " pd.concat([series_y, series_x], axis=1), maxlag=maxlag, verbose=False\n", |
| 1303 | + " )\n", |
| 1304 | + " pvalue_x_y = pd.Series(\n", |
| 1305 | + " [gct_x_y[lag][0][test][1] for lag in lags], index=lags\n", |
| 1306 | + " )\n", |
1242 | 1307 | " pvalue_x_y.plot(title=f\"{label_x} -> {label_y}\", grid=True)\n", |
1243 | | - " plt.plot((0, maxlag),(confidence_level, confidence_level),\"--r\")\n", |
| 1308 | + " plt.plot((0, maxlag), (confidence_level, confidence_level), \"--r\")\n", |
1244 | 1309 | " plt.ylabel(\"p-value\")\n", |
1245 | 1310 | "\n", |
1246 | 1311 | " plt.subplot(\"210\")\n", |
1247 | | - " gct_y_x = grangercausalitytests(pd.concat([series_x, series_y], axis=1), maxlag=maxlag, verbose=False)\n", |
1248 | | - " pvalue_y_x = pd.Series([gct_y_x[lag][0][test][1] for lag in lags], index=lags)\n", |
| 1312 | + " gct_y_x = grangercausalitytests(\n", |
| 1313 | + " pd.concat([series_x, series_y], axis=1), maxlag=maxlag, verbose=False\n", |
| 1314 | + " )\n", |
| 1315 | + " pvalue_y_x = pd.Series(\n", |
| 1316 | + " [gct_y_x[lag][0][test][1] for lag in lags], index=lags\n", |
| 1317 | + " )\n", |
1249 | 1318 | " pvalue_y_x.plot(title=f\"{label_y} -> {label_x}\", grid=True)\n", |
1250 | | - " plt.plot((0, maxlag),(confidence_level, confidence_level),\"--r\")\n", |
| 1319 | + " plt.plot((0, maxlag), (confidence_level, confidence_level), \"--r\")\n", |
1251 | 1320 | " plt.xlabel(\"lag [days]\")\n", |
1252 | 1321 | " plt.ylabel(\"p-value\")" |
1253 | 1322 | ] |
|
1278 | 1347 | } |
1279 | 1348 | ], |
1280 | 1349 | "source": [ |
1281 | | - "plot_granger_pvalues(calendar_spread, floating_storage, maxlag=MAXLAG, label_x=\"calendar spread\", label_y=\"floating storage\")" |
| 1350 | + "plot_granger_pvalues(\n", |
| 1351 | + " calendar_spread,\n", |
| 1352 | + " floating_storage,\n", |
| 1353 | + " maxlag=MAXLAG,\n", |
| 1354 | + " label_x=\"calendar spread\",\n", |
| 1355 | + " label_y=\"floating storage\",\n", |
| 1356 | + ")" |
1282 | 1357 | ] |
1283 | 1358 | }, |
1284 | 1359 | { |
|
1311 | 1386 | } |
1312 | 1387 | ], |
1313 | 1388 | "source": [ |
1314 | | - "plot_granger_pvalues(spot_prices, floating_storage, maxlag=MAXLAG, label_x=\"spot prices\", label_y=\"floating storage\")" |
| 1389 | + "plot_granger_pvalues(\n", |
| 1390 | + " spot_prices,\n", |
| 1391 | + " floating_storage,\n", |
| 1392 | + " maxlag=MAXLAG,\n", |
| 1393 | + " label_x=\"spot prices\",\n", |
| 1394 | + " label_y=\"floating storage\",\n", |
| 1395 | + ")" |
1315 | 1396 | ] |
1316 | 1397 | }, |
1317 | 1398 | { |
|
1342 | 1423 | } |
1343 | 1424 | ], |
1344 | 1425 | "source": [ |
1345 | | - "plot_granger_pvalues(spot_prices, calendar_spread, maxlag=MAXLAG, label_x=\"spot prices\", label_y=\"calendar spread\")" |
| 1426 | + "plot_granger_pvalues(\n", |
| 1427 | + " spot_prices,\n", |
| 1428 | + " calendar_spread,\n", |
| 1429 | + " maxlag=MAXLAG,\n", |
| 1430 | + " label_x=\"spot prices\",\n", |
| 1431 | + " label_y=\"calendar spread\",\n", |
| 1432 | + ")" |
1346 | 1433 | ] |
1347 | 1434 | }, |
1348 | 1435 | { |
|
1399 | 1486 | "end_ts = pd.Timestamp(\"now\")\n", |
1400 | 1487 | "start_ts = end_ts - pd.Timedelta(\"100 d\")\n", |
1401 | 1488 | "\n", |
1402 | | - "df_fs_now = fetch_global_crude_floating_storage_timeseries(start_ts.date(), end_ts.date(), UNIT)\n", |
| 1489 | + "df_fs_now = fetch_global_crude_floating_storage_timeseries(\n", |
| 1490 | + " start_ts.date(), end_ts.date(), UNIT\n", |
| 1491 | + ")\n", |
1403 | 1492 | "\n", |
1404 | 1493 | "floating_storage_now = df_fs_now.set_index(\"date\")[UNIT] / 1000\n", |
1405 | | - "floating_storage_now.plot(title=\"Global crude oil floating storage - last 100 days\", grid=True)\n", |
| 1494 | + "floating_storage_now.plot(\n", |
| 1495 | + " title=\"Global crude oil floating storage - last 100 days\", grid=True\n", |
| 1496 | + ")\n", |
1406 | 1497 | "plt.xlabel(\"date\")\n", |
1407 | 1498 | "plt.ylabel(\"k\" + UNIT);" |
1408 | 1499 | ] |
|
0 commit comments