1
1
import numpy as np
2
2
import numpy .ma as ma
3
+ import h5py
4
+ from collections import OrderedDict
3
5
4
6
#class to extract a mesa data file.
5
- class Mesa_Data :
6
- def __init__ (self , history_file , only_read_header = False , read_data = True , read_data_cols = [], clean_data = True ):
7
+ class mesa_data :
8
+ def __init__ (self , history_file , only_read_header = False , read_data = True , read_data_cols = [],
9
+ clean_data = True , sample_every_n = 1 , is_hdf5 = False ):
7
10
self .filename = history_file
11
+ self .is_hdf5 = is_hdf5
8
12
#header is a dictionary with the general info from the second and third line of file
9
13
self .header = {}
14
+ self .header_num = {}
10
15
#columns is a dictionary which gives the column number (minus 1) corresponding to the key
11
16
self .columns = {}
12
- file = open (self .filename , "r" )
13
- #first line is not used
14
- file .readline ()
15
- #following two lines have header data
16
- header_names = file .readline ().split ()
17
- header_vals = file .readline ().split ()
18
- for i , header_name in enumerate (header_names ):
19
- self .header [header_name ] = float (header_vals [i ])
20
- if only_read_header :
17
+ columns = []
18
+ if not is_hdf5 :
19
+ file = open (self .filename , "r" )
20
+ #first line is not used
21
+ file .readline ()
22
+ #following two lines have header data
23
+ header_names = file .readline ().split ()
24
+ header_vals = file .readline ().split ()
25
+ i = 0
26
+ for i , header_name in enumerate (header_names ):
27
+ #need to properly account for these new columns
28
+ if header_name in ["compiler" , "build" , "MESA_SDK_version" , "date" ]:
29
+ continue
30
+ self .header [header_name ] = float (header_vals [i ])
31
+ self .header_num [header_name ] = i
32
+ i += 1
33
+ if only_read_header :
34
+ file .close ()
35
+ return
36
+ #next line is empty
37
+ file .readline ()
38
+ #following two lines have column data
39
+ nums = file .readline ().split ()
40
+ names = file .readline ().split ()
41
+ for i , name in enumerate (names ):
42
+ self .columns [name ] = int (nums [i ])- 1
43
+ columns .append (name )
44
+ file .close ()
45
+ else :
46
+ file = h5py .File (self .filename , "r" )
47
+ header_names = file ['header_names' ][:]
48
+ header_vals = file ['header_vals' ][:]
49
+ for i in range (len (header_names )):
50
+ key = header_names [i ].decode ('utf-8' )
51
+ self .header [key ] = header_vals [i ]
52
+ self .header_num [key ] = i
53
+ columns = file ['data_names' ][:].tolist ()
54
+ for i , col in enumerate (columns ):
55
+ self .columns [col .decode ('utf-8' )] = i
56
+ columns [i ] = col .decode ('utf-8' )
21
57
file .close ()
22
- return
23
- #next line is empty
24
- file .readline ()
25
- #following two lines have column data
26
- nums = file .readline ().split ()
27
- names = file .readline ().split ()
28
- for i , name in enumerate (names ):
29
- self .columns [name ] = int (nums [i ])- 1
30
- file .close ()
31
58
32
59
if not read_data :
33
60
return
34
61
35
62
if len (read_data_cols ) == 0 :
36
- read_data_cols = self . columns . keys ()
63
+ read_data_cols = columns
37
64
self .read_data (read_data_cols , clean_data = clean_data )
38
65
39
66
40
- def read_data (self , column_names , clean_data = True ):
67
+ def read_data (self , column_names , clean_data = True , sample_every_n = 1 ):
41
68
#always include model_number if its part of the data
42
69
if "model_number" not in column_names and "model_number" in self .columns :
43
70
column_names .append ("model_number" )
44
71
45
- #read data
46
- data = np .loadtxt (self .filename , skiprows = 6 , \
47
- usecols = tuple ([self .columns [k ] for k in column_names ]), unpack = True )
72
+ #be sure there are no repeated column names
73
+ #(could use set but that breaks the order of the columns, which is needed if I want to save the file)
74
+ column_names = list (OrderedDict .fromkeys (column_names ))
75
+
76
+ self .read_columns = column_names
77
+
78
+ if not self .is_hdf5 :
79
+ #read data
80
+ data = np .loadtxt (self .filename , skiprows = 6 , \
81
+ usecols = tuple ([self .columns [k ] for k in column_names ]), unpack = True )
82
+ else :
83
+ file = h5py .File (self .filename , "r" )
84
+ data = file ['data_vals' ][:,sorted ([self .columns [k ] for k in column_names ])]
85
+ data = data .transpose ()
86
+ file .close ()
48
87
49
88
self .data = {}
50
89
#Be careful in case only one column is required
@@ -63,6 +102,7 @@ def read_data(self, column_names, clean_data = True):
63
102
#last entry is valid, start from there and remove repeats
64
103
for i in range (len (model_number )- 2 ,- 1 ,- 1 ):
65
104
if model_number [i ] >= max_model_number :
105
+ #exclude this point
66
106
mask [i ] = 1
67
107
else :
68
108
max_model_number = model_number [i ]
@@ -71,14 +111,87 @@ def read_data(self, column_names, clean_data = True):
71
111
for column in column_names :
72
112
self .data [column ] = ma .masked_array (self .data [column ], mask = mask ).compressed ()
73
113
114
+ #subsample points
115
+ if sample_every_n > 1 and "model_number" in self .columns and len (self .data ["model_number" ]) > 2 :
116
+ #keep first and last entry
117
+ #create a mask
118
+ model_number = self .data ["model_number" ]
119
+ mask = np .zeros (len (model_number ))
120
+ for i in range (1 ,len (model_number )- 1 ):
121
+ if (i + 1 )% sample_every_n != 0 :
122
+ #exclude this point
123
+ mask [i ] = 1
124
+
125
+ if sum (mask ) > 0 :
126
+ for column in column_names :
127
+ self .data [column ] = ma .masked_array (self .data [column ], mask = mask ).compressed ()
128
+
129
+ #count number of points using first entry in dict
130
+ self .num_points = len (self .data [list (self .read_columns )[0 ]])
131
+
74
132
def get (self ,key ):
75
133
return self .data [key ]
76
134
135
+ def save_as_hdf5 (self , filename , header_str_dtype = "S28" , data_str_dtype = "S40" , compression_opts = 4 ):
136
+ f = h5py .File (filename , "w" )
137
+ dset_header_names = f .create_dataset ("header_names" , (len (self .header ),), dtype = header_str_dtype )
138
+ dset_header_vals = f .create_dataset ("header_vals" , (len (self .header ),), dtype = "d" )
139
+ for key in self .header :
140
+ dset_header_names [self .header_num [key ]] = np .string_ (key )
141
+ dset_header_vals [self .header_num [key ]] = self .header [key ]
142
+ dset_column_names = f .create_dataset ("data_names" , (len (self .read_columns ),), dtype = data_str_dtype )
143
+ dset_column_vals = f .create_dataset ("data_vals" , (self .num_points ,len (self .read_columns )), dtype = "d" ,
144
+ compression = 'gzip' ,compression_opts = compression_opts )
145
+ for k , key in enumerate (self .read_columns ):
146
+ dset_column_names [k ] = np .string_ (key )
147
+ dset_column_vals [:,k ] = self .data [key ]
148
+ f .close ()
149
+
150
+ #creates a mesa look-alike output file
151
+ #prints all integers as doubles
152
+ #not the most efficient code but I don't care
153
+ def save_as_ascii (self , filename , header_str_format = "{0:>28}" , header_double_format = "{0:>28.16e}" ,
154
+ data_str_format = "{0:>40}" , data_double_format = "{0:>40.16e}" ):
155
+ f = open (filename , "w" )
156
+ for i in range (len (list (self .header ))):
157
+ f .write (header_str_format .format (i + 1 ))
158
+ f .write ("\n " )
159
+ #create an ordered list of keys
160
+ header_keys = []
161
+ for i in range (len (list (self .header ))):
162
+ for key in self .header :
163
+ if self .header_num [key ] == i :
164
+ header_keys .append (key )
165
+ break
166
+ for i , key in enumerate (header_keys ):
167
+ f .write (header_str_format .format (key ))
168
+ f .write ("\n " )
169
+ for i , key in enumerate (header_keys ):
170
+ f .write (header_double_format .format (self .header [key ]))
171
+ f .write ("\n " )
172
+ f .write ("\n " )
173
+
174
+ for i in range (len (list (self .read_columns ))):
175
+ f .write (data_str_format .format (i + 1 ))
176
+ f .write ("\n " )
177
+
178
+ for i , key in enumerate (self .read_columns ):
179
+ f .write (data_str_format .format (key ))
180
+ for k in range (self .num_points ):
181
+ f .write ("\n " )
182
+ for i , key in enumerate (self .read_columns ):
183
+ f .write (data_double_format .format (self .data [key ][k ]))
184
+
185
+ f .close ()
186
+
187
+
188
+
77
189
#reads the profiles.index files in the folders specified by the logs_dirs array and returns
78
190
#an array containing paths to the individual profile files, after cleaning up redos and backups
79
191
def get_profile_paths (logs_dirs = ["LOGS" ]):
80
192
profile_paths = []
81
193
for log_dir in logs_dirs :
194
+ print (log_dir , logs_dirs )
82
195
model_number , paths = np .loadtxt (log_dir + "/profiles.index" , skiprows = 1 , usecols = (0 ,2 ), unpack = True )
83
196
mask = np .zeros (len (paths ))
84
197
max_model_number = model_number [- 1 ]
0 commit comments