surface_morphometrics/run_pycurv.py at main · GrotjahnLab/surface_morphometrics · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#! /usr/bin/env python
"""Step two of the pipeline for surface morphometrics
Take a VTP style mesh generatead through the screen and perform curvature analysis using AVV.

Citation: Barad BA, Medina M et al. Quantifying organellar ultrastructure in cryo-electron tomography using a surface morphometrics pipeline. J Cell Biol 2023.
Pycurv Citation: Salfer M et al. Reliable estimation of membrane curvature for cryo-electron tomography. PLOS Computational Biology 2020.

Two usage options:
1. Run pycurv on a single mesh (Recommended!):
  run_pycurv.py config.yml mesh.vtp
2. Run pycurv on all meshes in the working directory:
 segmentation_to_meshes.py config.yml

Because pycurv is quite resource intensive, it is recommended to use option 1 with a cluster submission script in parallel, rather than sequentially running all vtp files.
"""

__author__ = "Benjamin Barad"
__email__ = "benjamin.barad@gmail.com"
__license__ = "GPLv3"

# Set OMP_NUM_THREADS=1 before any imports to prevent OpenMP + fork deadlocks
# when graph-tool is used with multiprocessing
import os
os.environ["OMP_NUM_THREADS"] = "1"

from sys import argv
import glob

import yaml

import curvature

# Check for -f flag
force = "-f" in argv
if force:
    argv.remove("-f")

# Check for a config file
if len(argv) < 2:
    print("Usage: run_pycurv.py [-f] config.yml [filename.surface.vtp]")
    exit()

# Check for a data dir and a work dir
with open(argv[1]) as file:
    config = yaml.safe_load(file)
    if not config["work_dir"]:
        if not config["data_dir"]:
            print("No working directory is specified in the config file. Please specify a working directory or a data directory.")
            exit()
        else:
            print("No working directory is specified in the config file. The data directory will be used for input and output.")
            config["work_dir"] = config["data_dir"]

# Warn if configured cores exceed logical cores
cores = config["cores"]
logical_cores = os.cpu_count()
if cores > logical_cores:
    print(f"WARNING: Configured cores ({cores}) exceeds the number of logical cores ({logical_cores}).")
    print("This may cause performance degradation due to oversubscription.")
    if not force:
        answer = input("Continue anyway? [y/n] ")
        if answer != "y":
            exit()

# Figure out what files will be run
if len(argv) == 2:
    print("No input file specified - will run on all VTP files in the data directory")
    print("This may take a very long time - pycurv can take over an hour to run on a single mesh")
    print("It is recommended to run in parallel with a cluster submission script for individual files")
    print("Recommended usage: run_pycurv.py config.yml <meshname.surface.vtp>")
    if not force:
        answer = input("Continue? [y/n]")
        if answer != "y":
            exit()
    mesh_files = glob.glob(config["work_dir"]+"*.surface.vtp")
    mesh_files = [os.path.basename(f) for f in mesh_files]
else:
    print("Input file specified - will run on this file only")
    mesh_files = [argv[2]]

# Check that work dir exists for outputs
if not os.path.isdir(config["work_dir"]):
    os.mkdir(config["work_dir"])

failed_surfaces = []
for i, surface in enumerate(mesh_files):
    print("Processing {} ({}/{})".format(surface, i+1, len(mesh_files)))
    try:
        curvature.run_pycurv(surface, config["work_dir"],
                            scale=1.0,
                            radius_hit=config["curvature_measurements"]["radius_hit"],
                            min_component=config["curvature_measurements"]["min_component"],
                            exclude_borders=config["curvature_measurements"]["exclude_borders"],
                            cores=config["cores"])
        print("Completed {}\n".format(surface))
    except Exception as e:
        print("WARNING: Skipping {} due to error: {}\n".format(surface, e))
        failed_surfaces.append(surface)

if failed_surfaces:
    print("The following surfaces failed and were skipped:")
    for s in failed_surfaces:
        print("  - {}".format(s))


print("-------------------------------------------------------")
print("Pycurv complete. It is highly recommended to check the AVV vtp file with paraview to confirm good results.")
print("If you are happy with the results, you can move on to `distances_and_orientations.py`.")
print("Pycurv Citation: Salfer M, Collado JF, Baumeister W, Fernández-Busnadiego R, Martínez-Sánchez A. Reliable estimation of membrane curvature for cryo-electron tomography. PLOS Comp Biol 2020.")
print("Pipeline Citation: Barad BA*, Medina M*, Fuentes D, Wiseman RL, Grotjahn DA. Quantifying organellar ultrastructure in cryo-electron tomography using a surface morphometrics pipeline. J Cell Biol 2023.")