diff --git a/data/acc_caracs_grav-2005.pkl b/data/acc_caracs_grav-2005.pkl new file mode 120000 index 00000000..c76f6382 --- /dev/null +++ b/data/acc_caracs_grav-2005.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2005.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2006.pkl b/data/acc_caracs_grav-2006.pkl new file mode 120000 index 00000000..1b8d8873 --- /dev/null +++ b/data/acc_caracs_grav-2006.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2006.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2007.pkl b/data/acc_caracs_grav-2007.pkl new file mode 120000 index 00000000..7f9e49b8 --- /dev/null +++ b/data/acc_caracs_grav-2007.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2007.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2008.pkl b/data/acc_caracs_grav-2008.pkl new file mode 120000 index 00000000..5df0a3eb --- /dev/null +++ b/data/acc_caracs_grav-2008.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2008.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2010.pkl b/data/acc_caracs_grav-2010.pkl new file mode 120000 index 00000000..402ccfa4 --- /dev/null +++ b/data/acc_caracs_grav-2010.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2010.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2011.pkl b/data/acc_caracs_grav-2011.pkl new file mode 120000 index 00000000..ec2792c9 --- /dev/null +++ b/data/acc_caracs_grav-2011.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2011.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2012.pkl b/data/acc_caracs_grav-2012.pkl new file mode 120000 index 00000000..81fff3c3 --- /dev/null +++ b/data/acc_caracs_grav-2012.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2012.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2013.pkl b/data/acc_caracs_grav-2013.pkl new file mode 120000 index 00000000..001076e1 --- /dev/null +++ b/data/acc_caracs_grav-2013.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2013.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2014.pkl b/data/acc_caracs_grav-2014.pkl new file mode 120000 index 00000000..73b868bf --- /dev/null +++ b/data/acc_caracs_grav-2014.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2014.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2015.pkl b/data/acc_caracs_grav-2015.pkl new file mode 120000 index 00000000..9c86344b --- /dev/null +++ b/data/acc_caracs_grav-2015.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2015.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2016.pkl b/data/acc_caracs_grav-2016.pkl new file mode 120000 index 00000000..614f8ebd --- /dev/null +++ b/data/acc_caracs_grav-2016.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2016.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2017.pkl b/data/acc_caracs_grav-2017.pkl new file mode 120000 index 00000000..a0c25893 --- /dev/null +++ b/data/acc_caracs_grav-2017.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2017.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2018.pkl b/data/acc_caracs_grav-2018.pkl new file mode 120000 index 00000000..6fe4d35c --- /dev/null +++ b/data/acc_caracs_grav-2018.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2018.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2019.pkl b/data/acc_caracs_grav-2019.pkl new file mode 120000 index 00000000..682ed9dc --- /dev/null +++ b/data/acc_caracs_grav-2019.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2019.pkl \ No newline at end of file diff --git a/data/acc_caracs_grav-2020.pkl b/data/acc_caracs_grav-2020.pkl new file mode 120000 index 00000000..b55dcacc --- /dev/null +++ b/data/acc_caracs_grav-2020.pkl @@ -0,0 +1 @@ +../pgab_accidents/data/acc_caracs_grav-2020.pkl \ No newline at end of file diff --git a/delta.py b/delta.py index 5fe72539..ad55b7ee 100644 --- a/delta.py +++ b/delta.py @@ -60,6 +60,7 @@ from formations import formations as formations_lib from APAAL_criminalite_education import criminalite_education from ADHD_Movies import movies +from pgab_accidents import accidents as pgab_accidents from ab_wg_apb_parcoursup import apb_parcoursup #@profile @@ -120,6 +121,7 @@ def init(): formations = formations_lib.Formations(app) crim_edu = criminalite_education.Criminalite_Education(app) mvs = movies.MoviesStats(app) + pgab_acc = pgab_accidents.Accidents(app) apb = apb_parcoursup.APB_PARCOURSUP(app) # external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] @@ -136,6 +138,7 @@ def init(): dcc.Link(html.Button('Décès journaliers', style={'width':"100%"}), href='/deces'), dcc.Link(html.Button('MDMR_NYPDCallsMeteoNY', style={'width':"100%"}), href='/MDMR_NYPDCallsMeteoNY'), dcc.Link(html.Button('Accident Routiers', style={'width':"100%", 'margin':0, 'padding': 0}), href='/accidents_routiers'), + dcc.Link(html.Button('Accidents de la route', style={'width':"100%"}), href='/pgab_accidents'), dcc.Link(html.Button('Médailles Olympique', style={'width': "100%"}), href='/olympics'), dcc.Link(html.Button("Génération d'énergie UE", style={'width':"100%"}), href='/Energy_generation'), dcc.Link(html.Button('Utilisation Vélibs', style={'width':"100%"}), href='/EVHB_velib'), @@ -228,6 +231,8 @@ def display_page(pathname): return dec.main_layout elif pathname == '/accidents_routiers': return pm.main_layout + elif pathname == '/pgab_accidents': + return pgab_acc.main_layout elif pathname == '/olympics': return oly.main_layout elif pathname == '/Energy_generation': diff --git a/pgab_accidents/.gitignore b/pgab_accidents/.gitignore new file mode 100644 index 00000000..b7c9ba7c --- /dev/null +++ b/pgab_accidents/.gitignore @@ -0,0 +1,3 @@ +data/*.csv +data/20*/ +map.html diff --git a/pgab_accidents/accidents.py b/pgab_accidents/accidents.py new file mode 100644 index 00000000..c273715a --- /dev/null +++ b/pgab_accidents/accidents.py @@ -0,0 +1,198 @@ +import glob +import dash +from dash import html +from dash import dcc +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import plotly.io as pio +import calendar as cal + +class Accidents: + START = 'Start' + STOP = 'Stop' + + def __init__(self, application = None): + self.df = pd.concat([pd.read_pickle(f) for f in glob.glob('data/acc_caracs_grav-*')]).sort_values(['year', 'grav'], + ascending = [True, False]) + self.df.rename(columns = {'grav':'Gravité', 'year':'Année', 'mois':'Mois'}, inplace = True) + bar_fig = px.histogram(self.df, x="Année", color="Gravité") + self.zoom=4 + self.center=dict(lat=46.7111, lon=1.7191) + + pio.templates["missing"] = go.layout.Template( + layout_annotations=[ + dict( + name="missing data watermark", + text="MISSING DATA", + textangle=-20, + opacity=.4, + font=dict(color="black", size=100), + xref="paper", + yref="paper", + x=0.5, + y=0.5, + showarrow=False, + ) + ] + ) + + self.main_layout = html.Div(children=[ + html.H3(children='Répartition des accidents de la route en France métropolitaine entre 2005 et 2020'), + html.Div([ + html.Div([ dcc.Graph(id='pgab-acc-main-graph'), ], style={'width':'100%', }), + html.Div([ dcc.RadioItems(id='pgab-acc-type', + options=[{'label':'Heatmap', 'value':0}, + {'label':'Emplacements exacts', 'value':1}], + value=0, + labelStyle={'display':'block'}), + html.Div(html.Button(self.START, id='pgab-button-start-stop', style={'display':'inline-block'}), style={'margin-right':'15px', 'width': '7em', 'float':'right'}), + ]), + ]), + html.Div([ + html.Br(), + html.Div( + dcc.Slider( + id='pgab-year-slider', + min=2005, + max=2020, + step = 1, + value=2005, + marks={str(year): str(year) for year in range(2005, 2021)}, + ), + style={'display':'inline-block', 'width':"90%"} + ), + dcc.Interval( + id='pgab-auto-stepper', + interval=1500, + max_intervals = -1, + n_intervals = 0 + ), + ], style={ + 'padding': '0px 50px', + 'width':'100%' + }), + html.Div([ dcc.Graph(id='pgab-acc-month-bar-graph'), ], style={'width':'100%', }), + dcc.Markdown(""" + Carte interactive des accidents de la route recensés entre 2005 et 2020. + + Utilisez le slider afin de sélectionner l'année. + + L'année 2009 reste manquante (fichier invalide), et avant 2015 la plupart des régions ne donnent pas de coordonnées GPS, les données mises à disposition sont incomplètes. + + La carte comporte plusieurs calques: + * **Heatmap** des accidents + * **Emplacements exacts** des accidents + + __Information complémentaire__, la gravité d'un accident correspond à: + * **1** | Indemne + * **2** | Blessures légères + * **3** | Blessures graves + * **4** | Blessures mortelles + """, style={'margin-top': '3rem'}), + + html.Div([ dcc.Graph(id='acc-bar-graph', figure=bar_fig), ], style={'width':'100%', }), + dcc.Markdown(""" + Histogramme des accidents de la route recensés entre 2005 et 2020. + + L'année 2009 reste manquante, les données mises à disposition sont incomplètes. + + #### À propos + * Données: [data.gouv.fr](https://www.data.gouv.fr/fr/datasets/bases-de-donnees-annuelles-des-accidents-corporels-de-la-circulation-routiere-annees-de-2005-a-2020/) + + © 2022 Paul Galand & Ancelin Bouchet + """, style={'margin-top': '3rem'}), + ], style={ + 'backgroundColor': 'white', + 'padding': '10px 50px 10px 50px', + 'display': 'flex', + 'flex-direction': 'column' + }) + + if application: + self.app = application + else: + self.app = dash.Dash(__name__) + self.app.layout = self.main_layout + + self.app.callback( + dash.dependencies.Output('pgab-acc-month-bar-graph', 'figure'), + dash.dependencies.Input('pgab-year-slider', 'value'))(self.update_month_bar_graph) + self.app.callback( + dash.dependencies.Output('pgab-acc-main-graph', 'figure'), + [ dash.dependencies.Input('pgab-acc-type', 'value'), + dash.dependencies.Input('pgab-year-slider', 'value')])(self.update_main_graph) + self.app.callback( + dash.dependencies.Output('pgab-button-start-stop', 'children'), + dash.dependencies.Input('pgab-button-start-stop', 'n_clicks'), + dash.dependencies.State('pgab-button-start-stop', 'children'))(self.button_on_click) + self.app.callback( + dash.dependencies.Output('pgab-auto-stepper', 'max_interval'), + [dash.dependencies.Input('pgab-button-start-stop', 'children')])(self.run_movie) + self.app.callback( + dash.dependencies.Output('pgab-year-slider', 'value'), + dash.dependencies.Input('pgab-auto-stepper', 'n_intervals'), + [dash.dependencies.State('pgab-year-slider', 'value'), + dash.dependencies.State('pgab-button-start-stop', 'children')])(self.on_interval) + + def update_month_bar_graph(self, year): + dfg = self.df + dfg = dfg[dfg['Année'] == str(year)] + if dfg.empty: + pio.templates.default = "missing" + else: + pio.templates.default = None + return px.histogram(dfg, x="Mois", color="Gravité") + + def update_main_graph(self, acc_type, year): + dfg = self.df + dfg = dfg[dfg['Année'] == str(year)] + if dfg.empty: + pio.templates.default = "missing" + else: + pio.templates.default = None + if acc_type: + fig=px.scatter_mapbox(dfg, lat='lat', lon='long', + color='Gravité', mapbox_style='carto-positron', + zoom=self.zoom, center=self.center, + color_continuous_scale=px.colors.sequential.Bluered, + custom_data=['jour', 'Mois', 'Année', 'Gravité']) + fig.update_layout(height=800) + else: + fig=px.density_mapbox(dfg, z=None, lat='lat', lon='long', + radius=5, opacity=.6, mapbox_style='carto-positron', + zoom=self.zoom, center=self.center, + color_continuous_scale=px.colors.diverging.Picnic, + custom_data=['jour', 'Mois', 'Année', 'Gravité']) + fig.update_layout(coloraxis_showscale=False, height=800) + fig.update_traces(hovertemplate="Date: %{customdata[0]}/%{customdata[1]}/%{customdata[2]} | Gravité: %{customdata[3]}") + fig.update_layout(uirevision='constant') + return fig + + def button_on_click(self, n_clicks, text): + if text == self.START: + return self.STOP + else: + return self.START + + def run_movie(self, text): + if text == self.START: + return 0 + else: + return -1 + + def on_interval(self, n_intervals, year, text): + if text == self.STOP: + if year == 2020: + return 2005 + if year == 2008: + return 2010 + return year + 1 + return year + + def run(self, debug=False, port=8050): + self.app.run_server(host="0.0.0.0", debug=debug, port=port) + +if __name__ == '__main__': + acc = Accidents() + acc.run(port=8065) diff --git a/pgab_accidents/data/acc_caracs_grav-2005.pkl b/pgab_accidents/data/acc_caracs_grav-2005.pkl new file mode 100644 index 00000000..105106a1 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2005.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2006.pkl b/pgab_accidents/data/acc_caracs_grav-2006.pkl new file mode 100644 index 00000000..4c42748b Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2006.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2007.pkl b/pgab_accidents/data/acc_caracs_grav-2007.pkl new file mode 100644 index 00000000..5ade8109 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2007.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2008.pkl b/pgab_accidents/data/acc_caracs_grav-2008.pkl new file mode 100644 index 00000000..5d403ac2 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2008.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2010.pkl b/pgab_accidents/data/acc_caracs_grav-2010.pkl new file mode 100644 index 00000000..0382d71a Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2010.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2011.pkl b/pgab_accidents/data/acc_caracs_grav-2011.pkl new file mode 100644 index 00000000..4dc74f0f Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2011.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2012.pkl b/pgab_accidents/data/acc_caracs_grav-2012.pkl new file mode 100644 index 00000000..da301231 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2012.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2013.pkl b/pgab_accidents/data/acc_caracs_grav-2013.pkl new file mode 100644 index 00000000..ca9a7a28 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2013.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2014.pkl b/pgab_accidents/data/acc_caracs_grav-2014.pkl new file mode 100644 index 00000000..628355c3 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2014.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2015.pkl b/pgab_accidents/data/acc_caracs_grav-2015.pkl new file mode 100644 index 00000000..f13d7456 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2015.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2016.pkl b/pgab_accidents/data/acc_caracs_grav-2016.pkl new file mode 100644 index 00000000..cb2a7189 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2016.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2017.pkl b/pgab_accidents/data/acc_caracs_grav-2017.pkl new file mode 100644 index 00000000..1abc9bca Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2017.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2018.pkl b/pgab_accidents/data/acc_caracs_grav-2018.pkl new file mode 100644 index 00000000..0e54afe6 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2018.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2019.pkl b/pgab_accidents/data/acc_caracs_grav-2019.pkl new file mode 100644 index 00000000..e9f9c17f Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2019.pkl differ diff --git a/pgab_accidents/data/acc_caracs_grav-2020.pkl b/pgab_accidents/data/acc_caracs_grav-2020.pkl new file mode 100644 index 00000000..782bd873 Binary files /dev/null and b/pgab_accidents/data/acc_caracs_grav-2020.pkl differ diff --git a/pgab_accidents/data/get_data.py b/pgab_accidents/data/get_data.py new file mode 100755 index 00000000..d14c2a79 --- /dev/null +++ b/pgab_accidents/data/get_data.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +import pandas as pd +import os + +default_kwargs = { + 'sep': ';', + 'quotechar': '"', +} +def process_caracs_pre_2019(caracs: pd.DataFrame) -> pd.DataFrame: + caracs[['lat','long']] /= 100_000 + return caracs + +sources = { + '2020': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/78c45763-d170-4d51-a881-e3147802d7ee', + 'opts': {**default_kwargs} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/07a88205-83c1-4123-a993-cba5331e8ae0', + 'opts': { + **default_kwargs, + 'decimal': ',' + } + } + }, + '2019': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/36b1b7b3-84b4-4901-9163-59ae8a9e3028', + 'opts': {**default_kwargs} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/e22ba475-45a3-46ac-a0f7-9ca9ed1e283a', + 'opts': { + **default_kwargs, + 'decimal': ',' + } + } + }, + '2018': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/72b251e1-d5e1-4c46-a1c2-c65f1b26549a', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/6eee0852-cbd7-447e-bd70-37c433029405', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2017': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/07bfe612-0ad9-48ef-92d3-f5466f8465fe', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/9a7d408b-dd72-4959-ae7d-c854ec505354', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2016': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/e4c6f4fe-7c68-4a1d-9bb6-b0f1f5d45526', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/96aadc9f-0b55-4e9a-a70e-c627ed97e6f7', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2015': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/b43a4237-9359-4217-b833-8d3dc29a6c24', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/185fbdc7-d4c5-4522-888e-ac9550718f71', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2014': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/457c10ff-ea6c-4238-9af1-d8dc62b896d4', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/85dfe8c6-589f-4e76-8a07-9f59e49ec10d', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2013': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/af4349c5-0293-4639-8694-b8b628bfc6c3', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/18b1a57a-57bf-4bf1-b9ee-dfa5a3154225', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2012': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/a19e060e-1c18-4272-ac4e-d4745ab8fade', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/b2518ec1-6529-47bc-9d55-40e2effeb0e7', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2011': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/bd946492-31b3-428e-8494-a1e203bdc9cc', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/37991267-8a15-4a9d-9b1c-ff3e6bea3625', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2010': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/c5e5664d-1483-41da-a4c6-5f1727d7a353', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/decdfe8c-38ff-4a06-b7fc-615785f2914d', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2008': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/433e26cf-d4c8-4dd9-b3f2-ecbc8a8f0509', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/722ebb99-c8b2-4635-bf8d-125dd280ee42', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2007': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/c5c30fc2-9bfd-4bcd-b45b-f01a31f1d087', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/6fc7b169-4dfe-442c-8c28-8bd773aeddf8', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2006': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/ebb4c37e-1616-497d-b5ed-f8113bed2ae7', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/fafa33cf-50cb-4092-a819-d5209f684089', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, + '2005': { + 'usagers': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/cecdbd46-11f2-41fa-b0bd-e6e223de6b3c', + 'opts': {} + }, + 'caracteristiques': { + 'url':'https://www.data.gouv.fr/fr/datasets/r/a47866f7-ece1-4de8-8d31-3a1b4f477e08', + 'opts': { + 'encoding':'iso-8859-1' + }, + 'process': process_caracs_pre_2019 + } + }, +} + +for year, src in sources.items(): + print(f"Collecting data for {year}") + print("Reading user data...") + usagers = pd.read_csv(src['usagers']['url'], **src['usagers']['opts']) + print("Reading accident caracteristics data...") + caracs = pd.read_csv(src['caracteristiques']['url'], **src['caracteristiques']['opts']) + + if 'process' in src['usagers']: + usagers = src['usagers']['process'](usagers) + + if 'process' in src['caracteristiques']: + caracs = src['caracteristiques']['process'](caracs) + + acc_grav = usagers[['Num_Acc','grav']].groupby('Num_Acc').agg('max') + acc_caracs_grav = pd.merge(caracs, acc_grav, how='inner', on='Num_Acc') + acc_caracs_grav['year'] = year + + # print("Writing file(s)...") + # usagers.to_csv(f"{year}/usagers.csv", index=False) + # caracs.to_csv(f"{year}/caracteristiques.csv", index=False) + acg = acc_caracs_grav.drop(columns=['hrmn', 'gps', 'adr', 'com', 'atm', 'int', 'an'], errors='ignore') + acg.to_pickle(f"acc_caracs_grav-{year}.pkl") + + +print() +print("Done.")