Skip to content

Commit 4c50ddc

Browse files
authored
Merge pull request #1 from godzilla-but-nice/dev
python loader with some functions
2 parents 6f350b0 + e4de87e commit 4c50ddc

File tree

505 files changed

+13934
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

505 files changed

+13934
-0
lines changed

pyMCDS.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
import xml.etree.ElementTree as TE
2+
import numpy as np
3+
import pandas as pd
4+
import scipy.io as sio
5+
from pathlib import Path
6+
7+
class pyMCDS:
8+
'''
9+
This class contains a dictionary of dictionaries that contains all of the
10+
output from a single time step of a PhysiCell Model. This class assumes that
11+
all output files are stored in the same directory. Data is loaded by reading
12+
the .xml file for a particular timestep.
13+
14+
Parameters
15+
----------
16+
xml_name : string
17+
String containing the name of the xml file without the path
18+
output_path : string (default: '.')
19+
String containing the path (relative or absolute) to the directory
20+
where PhysiCell output files are stored
21+
22+
Attributes
23+
----------
24+
data : dictionary
25+
Hierarchical container for all of the data retrieved by parsing the xml
26+
file and the files referenced therein.
27+
'''
28+
def __init__(self, xml_file, output_path='.'):
29+
self.data = self._read_xml(xml_file, output_path)
30+
31+
def get_cells_df(self):
32+
'''
33+
Builds DataFrame from data['discrete_cells']
34+
35+
Returns
36+
-------
37+
cells_df : pd.Dataframe [n_cells, n_variables]
38+
Dataframe containing the cell data for all cells at this time step
39+
'''
40+
cells_df = pd.DataFrame(self.data['discrete_cells'])
41+
return cells_df
42+
43+
def get_menv_species_list(self):
44+
'''
45+
Returns list of chemical species in microenvironment
46+
47+
Returns
48+
-------
49+
species_list : array-like (str) [n_species,]
50+
Contains names of chemical species in microenvironment
51+
'''
52+
species_list = []
53+
for name in self.data['continuum_variables']:
54+
species_list.append(name)
55+
56+
return species_list
57+
58+
def get_concentrations(self, species):
59+
'''
60+
Returns the concentration arrays for the specified chemical species
61+
in the microenvironment.
62+
63+
Returns
64+
-------
65+
conc_arr : array-like (np.float) [x_voxels, y_voxels, z_voxels]
66+
Contains the concentration of the specified chemical in each voxel.
67+
The array spatially maps to a meshgrid of the voxel centers.
68+
'''
69+
conc_arr = self.data['continuum_variables'][species]['data']
70+
return conc_arr
71+
72+
def get_time(self):
73+
return self.data['metadata']['current_time']
74+
75+
76+
def _read_xml(self, xml_file, output_path='.'):
77+
'''
78+
Does the actual work of initializing MultiCellDS by parsing the xml
79+
'''
80+
81+
output_path = Path(output_path)
82+
xml_file = output_path / xml_file
83+
try:
84+
tree = TE.parse(xml_file)
85+
except:
86+
print('Data File:', xml_file, 'not found!')
87+
exit(0)
88+
89+
root = tree.getroot()
90+
MCDS = {}
91+
92+
# Get current simulated time
93+
metadata_node = root.find('metadata')
94+
time_node = metadata_node.find('current_time')
95+
MCDS['metadata'] = {}
96+
MCDS['metadata']['current_time'] = float(time_node.text)
97+
MCDS['metadata']['time_units'] = time_node.get('units')
98+
99+
# Get current runtime
100+
time_node = metadata_node.find('current_runtime')
101+
MCDS['metadata']['current_runtime'] = float(time_node.text)
102+
MCDS['metadata']['runtime_units'] = time_node.get('units')
103+
104+
# find the microenvironment node
105+
me_node = root.find('microenvironment')
106+
me_node = me_node.find('domain')
107+
108+
# find the mesh node
109+
mesh_node = me_node.find('mesh')
110+
MCDS['metadata']['spatial_units'] = mesh_node.get('units')
111+
MCDS['mesh'] = {}
112+
113+
# check for cartesian mesh
114+
cartesian = False
115+
mesh_type = mesh_node.get('type')
116+
if mesh_type == 'Cartesian':
117+
cartesian = True
118+
119+
# while we're at it, find the mesh
120+
coord_str = mesh_node.find('x_coordinates').text
121+
delimiter = mesh_node.find('x_coordinates').get('delimiter')
122+
x_coords = np.array(coord_str.split(delimiter), dtype=np.float)
123+
124+
coord_str = mesh_node.find('y_coordinates').text
125+
delimiter = mesh_node.find('y_coordinates').get('delimiter')
126+
y_coords = np.array(coord_str.split(delimiter), dtype=np.float)
127+
128+
coord_str = mesh_node.find('z_coordinates').text
129+
delimiter = mesh_node.find('z_coordinates').get('delimiter')
130+
z_coords = np.array(coord_str.split(delimiter), dtype=np.float)
131+
132+
# reshape into a mesh grid
133+
xx, yy, zz = np.meshgrid(x_coords, y_coords, z_coords)
134+
135+
MCDS['mesh']['x_coordinates'] = xx
136+
MCDS['mesh']['y_coordinates'] = yy
137+
MCDS['mesh']['z_coordinates'] = zz
138+
139+
# Voxel data must be loaded from .mat file
140+
voxel_file = mesh_node.find('voxels').find('filename').text
141+
voxel_path = output_path / voxel_file
142+
try:
143+
initial_mesh = sio.loadmat(voxel_path)['mesh']
144+
except:
145+
print('Data file', voxel_path, 'missing!')
146+
print('Referenced in', xml_file)
147+
exit(0)
148+
149+
# center of voxel specified by first three rows [ x, y, z ]
150+
# volume specified by fourth row
151+
MCDS['mesh']['voxels'] = {}
152+
MCDS['mesh']['voxels']['centers'] = initial_mesh[:3, :]
153+
MCDS['mesh']['voxels']['volumes'] = initial_mesh[3, :]
154+
155+
# Continuum_variables, unlike in the matlab version the individual chemical
156+
# species will be primarily accessed through their names e.g.
157+
# MCDS['continuum_variables']['oxygen']['units']
158+
# MCDS['continuum_variables']['glucose']['data']
159+
MCDS['continuum_variables'] = {}
160+
variables_node = me_node.find('variables')
161+
file_node = me_node.find('data').find('filename')
162+
163+
# micro environment data is shape [4+n, len(voxels)] where n is the number
164+
# of species being tracked. the first 3 rows represent (x, y, z) of voxel
165+
# centers. The fourth row contains the voxel volume. The 5th row and up will
166+
# contain values for that species in that voxel.
167+
me_file = file_node.text
168+
me_path = output_path / me_file
169+
try:
170+
me_data = sio.loadmat(me_path)['multiscale_microenvironment']
171+
except:
172+
print('Data file', me_path, 'missing!')
173+
print('Referenced in', xml_file)
174+
exit(0)
175+
176+
var_children = variables_node.findall('variable')
177+
178+
for i, species in enumerate(var_children):
179+
species_name = species.get('name')
180+
MCDS['continuum_variables'][species_name] = {}
181+
MCDS['continuum_variables'][species_name]['units'] = species.get(
182+
'units')
183+
184+
# travel down one level on tree
185+
species = species.find('physical_parameter_set')
186+
187+
# diffusion data for each species
188+
MCDS['continuum_variables'][species_name]['diffusion_coefficient'] = {}
189+
MCDS['continuum_variables'][species_name]['diffusion_coefficient']['value'] \
190+
= float(species.find('diffusion_coefficient').text)
191+
MCDS['continuum_variables'][species_name]['diffusion_coefficient']['units'] \
192+
= species.find('diffusion_coefficient').get('units')
193+
194+
# decay data for each species
195+
MCDS['continuum_variables'][species_name]['decay_rate'] = {}
196+
MCDS['continuum_variables'][species_name]['decay_rate']['value'] \
197+
= float(species.find('decay_rate').text)
198+
MCDS['continuum_variables'][species_name]['decay_rate']['units'] \
199+
= species.find('decay_rate').get('units')
200+
201+
# store data from microenvironment file as numpy array
202+
MCDS['continuum_variables'][species_name]['data'] \
203+
= me_data[4+i, :].reshape(xx.shape)
204+
205+
# in order to get to the good stuff we have to pass through a few different
206+
# hierarchal levels
207+
cell_node = root.find('cellular_information')
208+
cell_node = cell_node.find('cell_populations')
209+
cell_node = cell_node.find('cell_population')
210+
cell_node = cell_node.find('custom')
211+
# we want the PhysiCell data, there is more of it
212+
for child in cell_node.findall('simplified_data'):
213+
if child.get('source') == 'PhysiCell':
214+
cell_node = child
215+
break
216+
217+
MCDS['discrete_cells'] = {}
218+
data_labels = []
219+
# iterate over 'label's which are children of 'labels' these will be used to
220+
# label data arrays
221+
for label in cell_node.find('labels').findall('label'):
222+
# I don't like spaces in my dictionary keys
223+
fixed_label = label.text.replace(' ', '_')
224+
if int(label.get('size')) > 1:
225+
# tags to differentiate repeated labels (usually space related)
226+
dir_label = ['_x', '_y', '_z']
227+
for i in range(int(label.get('size'))):
228+
data_labels.append(fixed_label + dir_label[i])
229+
else:
230+
data_labels.append(fixed_label)
231+
232+
# load the file
233+
cell_file = cell_node.find('filename').text
234+
cell_path = output_path / cell_file
235+
try:
236+
cell_data = sio.loadmat(cell_path)['cells']
237+
except:
238+
print('Data file', cell_path, 'missing!')
239+
print('Referenced in', xml_file)
240+
exit(0)
241+
242+
for col in range(len(data_labels)):
243+
MCDS['discrete_cells'][data_labels[col]] = cell_data[col, :]
244+
245+
return MCDS

pyMCDS_timeseries.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from pathlib import Path
4+
from pyMCDS import pyMCDS
5+
6+
class pyMCDS_timeseries:
7+
'''
8+
This class contains a np.array of pyMCDS objects as well as functions for
9+
extracting information from that list.
10+
11+
Parameters
12+
----------
13+
output_path : string
14+
String containing the path (relative or absolute) to the directory
15+
containing the PhysiCell output files
16+
17+
Attributes
18+
----------
19+
timeseries : array-like (pyMCDS) [n_timesteps,]
20+
Numpy array of pyMCDS objects sorted by time.
21+
'''
22+
def __init__(self, output_path='.'):
23+
# get a generator of output xml files sorted alphanumerically
24+
sorted_files = sorted(Path(output_path).glob('output*.xml'))
25+
26+
# make a big array to keep them all, create a pyMCDS object with each
27+
# self.timeseries = np.empty(len(sorted_files), dtype=pyMCDS)
28+
# first we'll do a list, if its slow we'll figure out the array
29+
ts_list = []
30+
for f in sorted_files:
31+
ts_list.append(pyMCDS(f.name, output_path))
32+
33+
self.timeseries = np.array(ts_list)
34+
35+
def get_times(self):
36+
'''
37+
Helper function to easily return the cumulative time at each step.
38+
39+
Returns
40+
-------
41+
time_array : ndaray (np.float) [n_timesteps,]
42+
Contains the 'current time' at each recorded point.
43+
'''
44+
time_array = np.zeros(self.timeseries.shape[0])
45+
for i in range(len(self.timeseries)):
46+
time_array[i] = self.timeseries[i].get_time()
47+
48+
return time_array
49+
50+
def plot_menv_total(self, species):
51+
'''
52+
Plot total concentration of chemicals in the system over time.
53+
54+
Parameters
55+
----------
56+
species : array-like (string) or string
57+
Name os names of chemical species to plot
58+
'''
59+
# set up arrays, quicker than lists
60+
times = self.get_times()
61+
total_conc = np.zeros((times.shape[0], len(species)))
62+
# iterate over the times to fill array
63+
for time in range(times.shape[0]):
64+
# iterate over the chemical species to fill array
65+
for i in range(len(species)):
66+
conc_arr = self.timeseries[time].get_concentrations(species[i])
67+
total_conc[time, i] = np.sum(conc_arr)
68+
# establish plots
69+
fig, ax = plt.subplots(figsize=(10, 8))
70+
# add plot elements and show plot
71+
for i, chem in enumerate(species):
72+
ax.plot(times, total_conc[:, i], label=chem)
73+
ax.set_xlabel('Time (min)')
74+
ax.set_ylabel('Total concentration')
75+
ax.legend()
76+
plt.show()
77+
78+
def plot_cell_type_counts(self):
79+
'''
80+
Plot the absolute counts of different types of cells over time.
81+
'''
82+
times = self.get_times()
83+
unique_types = np.unique(self.timeseries[0].data['discrete_cells']['cell_type'])
84+
cell_counts = np.zeros((times.shape[0], unique_types.shape[0]))
85+
for time_i in range(times.shape[0]):
86+
df = self.timeseries[time_i].get_cells_df()
87+
counts = df['cell_type'].value_counts()
88+
cell_counts[time_i, :] = counts
89+
fig, ax = plt.subplots(figsize=(10, 8))
90+
for i in range(cell_counts.shape[1]):
91+
ax.scatter(times, cell_counts[:, i], label=i, s=10)
92+
ax.set_xlabel('Time (min)')
93+
ax.set_ylabel('# of cells in system')
94+
ax.legend()
95+
plt.show()
96+
97+

0 commit comments

Comments
 (0)