SilentCities/postprocess.py at master · npaj/SilentCities · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os,sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from analysis import subset_probas
import plotly.graph_objects as go

fewlabels = ['Applause',
'Bird vocalization, bird call, bird song',
'Caw',
'Bee, wasp, etc.',
'Wind noise (microphone)','Rain','Vehicle','Silence']


def average_proba_over_freq(Df,freq_str='D',subset_labels=fewlabels):
    """
    Calculates probability density estimates over a configurable frequency

    arguments :
    Df : DataFrame, output of tag_silentcities
    freq_str : Frequency over which to calculate probability density estimate (default : days)
    subset_labels : Subset of labels from the Audioset Ontology to be used for the estimate.
    default labels are :
    'Applause','Bird vocalization, bird call, bird song','Chirp, tweet','Pigeon, dove',
    'Caw','Bee, wasp, etc.','Wind noise (microphone)','Rain','Vehicle','Emergency vehicle','Rail transport',
    'Aircraft','Silence'

    outputs :

    probas_agg : Probability Density estimates of the subset of labels calculate according to the frequency specified

    """
    #Let's use the datetime (Timestamp) as a new index for the dataframe
    ts = pd.DatetimeIndex(Df.datetime)
    Df.index = ts

    # Let's add the Labels from the shortlist as entries in the Df. Will be easier to manipulate them afterwards

    prob = subset_probas(Df,subset_labels)

    for f,curlabel in enumerate(subset_labels):
        Df[curlabel] = prob[:,f]

    # Now let's create a period range to easily compute statistics over days (frequency can be changed by changing the freq_str argument)
    prng = pd.period_range(start=ts[0],end=ts[-1], freq=freq_str).astype('datetime64[ns]')

    # And now create the final DataFrame that averages probabilities (of labels subset_labels) with the frequency defined in freq_str

    allser = dict()

    for lab in fewlabels:

        curser = pd.Series([Df[prng[i]:prng[i+1]][lab].mean() for i in range(len(prng)-1)],index=prng[:-1])

        allser[lab] = curser

    probas_agg = pd.DataFrame(allser)

    return probas_agg


def make_interactive_pdf(Df,list_resolutions = ['0.25H','H','3H','6H','12H','D'],active_beg = 4,subset_labels=fewlabels):

    fig = go.Figure()

     ## which resolution is active when starting

    ### loop on resolution

    for cur_res in list_resolutions:

        probas_agg = average_proba_over_freq(Df,freq_str=cur_res,subset_labels=fewlabels)

        datelabel = probas_agg.index

        # Create figure

        for curcol in probas_agg.columns:

            fig.add_trace(go.Scatter(x=datelabel, y=probas_agg[curcol],name=curcol,visible=False))


    nbcol = len(probas_agg.columns)

    # Make one resolution visible trace visible
    for curcol,_ in enumerate(probas_agg.columns):

        fig.data[active_beg*nbcol+curcol].visible = True


    # Create and add slider
    steps = []
    for i in range(len(list_resolutions)):
        step = dict(
            method="restyle",
            label=list_resolutions[i],
            args=["visible", [False] * len(fig.data)],
        )

        for curcol,_ in enumerate(probas_agg.columns):
            step["args"][1][i*nbcol+curcol] = True  # Toggle trace to "visible"
            steps.append(step)

    sliders = [dict(
        active=active_beg*nbcol,
        pad={"t": len(list_resolutions)},
        #currentvalue={"prefix": "Resolution: "},
        steps=steps
    )]

    fig.update_layout(
        sliders=sliders
    )
    return fig


if __name__ == "__main__":

    Df = pd.read_pickle('0001.xz')
    fig = make_interactive_pdf(Df)
    fig.write_html("myfig.html")
    fig.show()