-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtabla2paper.py
145 lines (110 loc) · 4.55 KB
/
tabla2paper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
En esta rutina armamos la tabla 2 del paper donde se comparan los
datos de viento de la boya contra los datos de CCMPv2 y ERA- interim
Dani Risaro
Noviembre 2019
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def reject_outliers(data, m=2):
"""
Función que remueve outliers del conjunto de datos
Parámetros de entrada:
data: dataframe de diferencias
m: int. Cantidad de desvíos std
Salida:
dataframe without outliers
"""
return data[abs(data - np.mean(data)) < m * np.std(data)]
def remove_outliers_diference(data_x, data_y):
"""
Esta funcion remueve los outliers de un par de set de datos
a partir de las diferencias entre ellos (entre dfA y dfB).
Cuando las difs exceden 2 std, esos datos son removidos
Parámetros de entrada:
data_x: dataframe A
data_y: dataframe B
Salida:
"""
dif = data_x - data_y
outliers = reject_outliers(dif, m=2)
data_x_without_out = data_x[outliers.index]
data_y_without_out = data_y[outliers.index]
return data_x_without_out, data_y_without_out
# table to fill
sites = ['A' ,'B', 'C1', 'C2', 'D']
parameters = ['Latitude (S)',
'Longitude (W)',
'Date',
'Period of data (days)',
'Mean wsp buoy (m/s)',
'Std wsp buoy (m/s)',
'R (CCMPv2, buoy)',
'R (ERA-Interim, buoy)',
'R (CCMPv2, buoy) without outliers',
'R (ERA-Interim, buoy) without outliers']
df = pd.DataFrame(index=sites, columns=parameters)
# organizo la iteracion
directory = '/media/daniu/Seagate Expansion Drive/Documentos_DELL_home/datos_boya/'
buoyfiles = ['boya_2015/datos_boya_1hora.csv',
'boya_2006/datos_boya_1hora.csv',
'boya_2006_corto/datos_boya_1hora.csv',
'boya_2005/datos_boya_1hora.csv',
'boya_2016/datos_boya_1hora.csv']
ccmpfiles = ['boya_2015/datos_ccmp_6horas.csv',
'boya_2006/datos_ccmp_6horas.csv',
'boya_2006_corto/datos_ccmp_6horas.csv',
'boya_2005/datos_ccmp_6horas.csv',
'boya_2016/datos_ccmp_6horas.csv']
eraifiles = ['boya_2015/datos_era_interim_6horas.csv',
'boya_2006/datos_era_interim_6horas.csv',
'boya_2006_corto/datos_era_interim_6horas.csv',
'boya_2005/datos_era_interim_6horas.csv',
'boya_2016/datos_era_interim_6horas.csv']
for i in range(5):
ibuoyfile = directory + buoyfiles[i]
iccmpfile = directory + ccmpfiles[i]
ieraifile = directory + eraifiles[i]
ccmpv2 = pd.read_csv(iccmpfile, index_col=0)
erai = pd.read_csv(ieraifile, index_col=0)
buoy_hor = pd.read_csv(ibuoyfile, header=[0], index_col=0, delimiter='\t')
boya = buoy_hor.iloc[::6]
if i==4:
boya = buoy_hor.iloc[3::6]
# comparison with CCMP
x_boya = boya['int']
y_ccmp = ccmpv2['speed']
x_boya.index = y_ccmp.index # reindex just in case they are not in the same format
x_boya_without_out, y_ccmp_without_out = remove_outliers_diference(x_boya, y_ccmp)
R_with_out_ccmp = x_boya.corr(y_ccmp)
R_without_out_ccmp = x_boya_without_out.corr(y_ccmp_without_out)
# comparison with ERA i
x_boya = boya['int']
y_erai = erai['speed']
x_boya.index = y_erai.index # reindex just in case they are not in the same format
x_boya_without_out, y_erai_without_out = remove_outliers_diference(x_boya, y_erai)
R_with_out_erai = x_boya.corr(y_erai)
R_without_out_erai = x_boya_without_out.corr(y_erai_without_out)
# lat - lon position and length
pos_lat = boya.lat.values[0]
pos_lon = boya.lon.values[0]
ndays = len(buoy_hor)/24
# date range
date = buoy_hor.index[0] + ' to ' + buoy_hor.index[-1]
# mean and std from buoy
mean_buoy = x_boya.mean()
std_buoy = x_boya.std()
# fill the table
df.loc[sites[i], 'Latitude (S)'] = pos_lat.round(2)
df.loc[sites[i], 'Longitude (W)'] = pos_lon.round(2)
df.loc[sites[i], 'Date'] = date
df.loc[sites[i], 'Period of data (days)'] = int(ndays)
df.loc[sites[i], 'Mean wsp buoy (m/s)'] = mean_buoy.round(2)
df.loc[sites[i], 'Std wsp buoy (m/s)'] = std_buoy.round(2)
df.loc[sites[i], 'R (CCMPv2, buoy)'] = R_with_out_ccmp.round(2)
df.loc[sites[i], 'R (ERA-Interim, buoy)'] = R_with_out_erai.round(2)
df.loc[sites[i], 'R (CCMPv2, buoy) without outliers'] = R_without_out_ccmp.round(2)
df.loc[sites[i], 'R (ERA-Interim, buoy) without outliers'] = R_without_out_erai.round(2)
print(df)
df.to_csv('/home/daniu/Documentos/tablas/tabla_buoy_observations.csv', sep=',')