Skip to content

Commit be2ee69

Browse files
committed
Add a singlification option.
This option allow threads to run on a single physical resource. The singlification can be done early (for negative values of the MCA parameter) and will pack the threads on the resources, or late (for positive values of the MCA parameter) in which case the threads will be spread across the resources. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent 74d2115 commit be2ee69

File tree

4 files changed

+51
-38
lines changed

4 files changed

+51
-38
lines changed

parsec/parsec.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ int parsec_report_binding_issues = 128;
100100
int parsec_report_bindings = 0; /* dont show the bindings by default */
101101
int parsec_runtime_ignore_bindings = 0; /* ignore the bindings provided by the process manager */
102102
int parsec_runtime_allow_ht = 0; /* bind to cores by default */
103+
int parsec_runtime_singlify_bindings = 0;
103104

104105
int parsec_want_rusage = 0;
105106
#if defined(PARSEC_HAVE_GETRUSAGE) && !defined(__bgp__)
@@ -489,6 +490,12 @@ parsec_context_t* parsec_init( int nb_cores, int* pargc, char** pargv[] )
489490
parsec_mca_param_reg_int_name("runtime", "allow_pu",
490491
"Allow threads to bind to PU (processing unit) instead of cores.",
491492
false, false, parsec_runtime_allow_ht, &parsec_runtime_allow_ht);
493+
parsec_mca_param_reg_int_name("runtime", "singlify_bindings",
494+
"Force threads to be bound on a single physical resource (negative indicated "
495+
"a early singlificationm 0 no singlification, and any positive value a late "
496+
"singlification). With a negative value threads will be packed, while with a "
497+
"positive value they will be spread across the available resources",
498+
false, false, parsec_runtime_singlify_bindings, &parsec_runtime_singlify_bindings);
492499
if( parsec_cmd_line_is_taken(cmd_line, "ht") ) {
493500
parsec_warning("Option ht (hyper-threading) is now deprecated as a command line argument. Use the MCA "
494501
"parsec_runtime_allow_pu instead.\n");

parsec/parsec_hwloc.c

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
#include <windows.h>
2323
#endif /* defined(__WINDOWS__) */
2424
#if defined(PARSEC_HAVE_HWLOC)
25-
static hwloc_topology_t topology;
25+
static hwloc_topology_t parsec_hwloc_loaded_topology;
26+
static hwloc_topology_t parsec_hwloc_restricted_topology;
2627
static int parsec_hwloc_first_init = 1;
2728
#endif /* defined(PARSEC_HAVE_HWLOC) */
2829
static int hyperth_per_core = 1;
@@ -104,18 +105,18 @@ int parsec_hwloc_init(void)
104105
parsec_fatal("Compile headers and runtime hwloc libraries are not compatible (headers %x ; lib %x)", HWLOC_API_VERSION, hwloc_get_api_version());
105106
}
106107
#endif
107-
hwloc_topology_init(&topology);
108-
hwloc_topology_load(topology);
108+
hwloc_topology_init(&parsec_hwloc_loaded_topology);
109+
hwloc_topology_load(parsec_hwloc_loaded_topology);
109110

110111
int binding_unit = (parsec_runtime_allow_ht ? HWLOC_OBJ_PU : HWLOC_OBJ_CORE);
111112

112113
parsec_cpuset_original = HWLOC_ALLOC();
113114
parsec_cpuset_restricted = HWLOC_ALLOC();
114115
/* save the original process binding */
115-
int rc = hwloc_get_cpubind(topology, parsec_cpuset_original, HWLOC_CPUBIND_PROCESS );
116+
int rc = hwloc_get_cpubind(parsec_hwloc_loaded_topology, parsec_cpuset_original, HWLOC_CPUBIND_PROCESS );
116117
if( 0 != rc ) {
117118
/* We are on a system without support for process/thread binding. */
118-
parsec_available_binding_resources = hwloc_get_nbobjs_by_type(topology, binding_unit);
119+
parsec_available_binding_resources = hwloc_get_nbobjs_by_type(parsec_hwloc_loaded_topology, binding_unit);
119120
hwloc_bitmap_set_range(parsec_cpuset_original, 0, parsec_available_binding_resources-1);
120121
parsec_runtime_ignore_bindings = 1; /* ignore all bindings provided by the user */
121122
}
@@ -126,7 +127,7 @@ int parsec_hwloc_init(void)
126127
* loaded topology might not be the real hardware topology, but restricted to what
127128
* this process can access.
128129
*/
129-
parsec_available_binding_resources = hwloc_get_nbobjs_by_type(topology, binding_unit);
130+
parsec_available_binding_resources = hwloc_get_nbobjs_by_type(parsec_hwloc_loaded_topology, binding_unit);
130131
hwloc_bitmap_set_range(parsec_cpuset_restricted, 0, parsec_available_binding_resources-1);
131132
} else {
132133
hwloc_bitmap_copy(parsec_cpuset_restricted, parsec_cpuset_original);
@@ -145,8 +146,8 @@ int parsec_hwloc_init(void)
145146
}
146147

147148
if( parsec_runtime_allow_ht > 1 ) {
148-
int hyperth_per_core = (hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU) /
149-
hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE));
149+
int hyperth_per_core = (hwloc_get_nbobjs_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_PU) /
150+
hwloc_get_nbobjs_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE));
150151
if( parsec_runtime_allow_ht > hyperth_per_core) {
151152
parsec_warning("HyperThreading:: There not enough logical processors to consider %i HyperThreads "
152153
"per core (set up to %i)", parsec_runtime_allow_ht, hyperth_per_core);
@@ -164,7 +165,7 @@ int parsec_hwloc_init(void)
164165
int parsec_hwloc_fini(void)
165166
{
166167
#if defined(PARSEC_HAVE_HWLOC)
167-
hwloc_topology_destroy(topology);
168+
hwloc_topology_destroy(parsec_hwloc_loaded_topology);
168169
parsec_hwloc_first_init = 1;
169170
#endif /* defined(PARSEC_HAVE_HWLOC) */
170171
return PARSEC_SUCCESS;
@@ -175,9 +176,9 @@ int parsec_hwloc_export_topology(int *buflen, char **xmlbuffer)
175176
#if defined(PARSEC_HAVE_HWLOC)
176177
if( parsec_hwloc_first_init == 0 ) {
177178
#if HWLOC_API_VERSION >= 0x20000
178-
return hwloc_topology_export_xmlbuffer(topology, xmlbuffer, buflen, 0 /*HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1*/);
179+
return hwloc_topology_export_xmlbuffer(parsec_hwloc_loaded_topology, xmlbuffer, buflen, 0 /*HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1*/);
179180
#else
180-
return hwloc_topology_export_xmlbuffer(topology, xmlbuffer, buflen);
181+
return hwloc_topology_export_xmlbuffer(parsec_hwloc_loaded_topology, xmlbuffer, buflen);
181182
#endif
182183
}
183184
#endif
@@ -193,7 +194,7 @@ void parsec_hwloc_free_xml_buffer(char *xmlbuffer)
193194

194195
#if defined(PARSEC_HAVE_HWLOC)
195196
if( parsec_hwloc_first_init == 0 ) {
196-
hwloc_free_xmlbuffer(topology, xmlbuffer);
197+
hwloc_free_xmlbuffer(parsec_hwloc_loaded_topology, xmlbuffer);
197198
}
198199
#endif
199200
}
@@ -203,8 +204,8 @@ int parsec_hwloc_distance( int id1, int id2 )
203204
#if defined(PARSEC_HAVE_HWLOC)
204205
int count = 0;
205206

206-
hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, id1);
207-
hwloc_obj_t obj2 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, id2);
207+
hwloc_obj_t obj = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE, id1);
208+
hwloc_obj_t obj2 = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE, id2);
208209

209210
while( obj && obj2) {
210211
if(obj == obj2 ) {
@@ -228,13 +229,13 @@ int parsec_hwloc_master_id( int level, int processor_id )
228229
unsigned int i;
229230
int ncores;
230231

231-
ncores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
232+
ncores = hwloc_get_nbobjs_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE);
232233

233234
/* If we are using hyper-threads */
234235
processor_id = processor_id % ncores;
235236

236-
for(i = 0; i < hwloc_get_nbobjs_by_depth(topology, level); i++) {
237-
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, level, i);
237+
for(i = 0; i < hwloc_get_nbobjs_by_depth(parsec_hwloc_loaded_topology, level); i++) {
238+
hwloc_obj_t obj = hwloc_get_obj_by_depth(parsec_hwloc_loaded_topology, level, i);
238239

239240
if(HWLOC_ISSET(obj->cpuset, processor_id)) {
240241
return HWLOC_FIRST(obj->cpuset);
@@ -253,8 +254,8 @@ unsigned int parsec_hwloc_nb_cores( int level, int master_id )
253254
#if defined(PARSEC_HAVE_HWLOC)
254255
unsigned int i;
255256

256-
for(i = 0; i < hwloc_get_nbobjs_by_depth(topology, level); i++){
257-
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, level, i);
257+
for(i = 0; i < hwloc_get_nbobjs_by_depth(parsec_hwloc_loaded_topology, level); i++){
258+
hwloc_obj_t obj = hwloc_get_obj_by_depth(parsec_hwloc_loaded_topology, level, i);
258259
if(HWLOC_ISSET(obj->cpuset, master_id)){
259260
return HWLOC_WEIGHT(obj->cpuset);
260261
}
@@ -269,14 +270,14 @@ size_t parsec_hwloc_cache_size( unsigned int level, int master_id )
269270
{
270271
#if defined(PARSEC_HAVE_HWLOC)
271272
#if defined(PARSEC_HAVE_HWLOC_OBJ_PU) || 1
272-
hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, master_id);
273+
hwloc_obj_t obj = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_PU, master_id);
273274
#else
274-
hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PROC, master_id);
275+
hwloc_obj_t obj = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_PROC, master_id);
275276
#endif /* defined(PARSEC_HAVE_HWLOC_OBJ_PU) */
276277

277278
while (obj) {
278279
#if HWLOC_API_VERSION >= 0x00020000
279-
if((int)level == hwloc_get_type_depth(topology, obj->type)) {
280+
if((int)level == hwloc_get_type_depth(parsec_hwloc_loaded_topology, obj->type)) {
280281
if(hwloc_obj_type_is_cache(obj->type)) {
281282
#else
282283
if(obj->depth == level){
@@ -319,9 +320,9 @@ int parsec_hwloc_nb_real_cores(void)
319320
int parsec_hwloc_core_first_hrwd_ancestor_depth(void)
320321
{
321322
#if defined(PARSEC_HAVE_HWLOC)
322-
int level = MAX( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE),
323-
hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) );
324-
assert(level < hwloc_get_type_depth(topology, HWLOC_OBJ_CORE));
323+
int level = MAX( hwloc_get_type_depth(parsec_hwloc_loaded_topology, HWLOC_OBJ_NODE),
324+
hwloc_get_type_depth(parsec_hwloc_loaded_topology, HWLOC_OBJ_SOCKET) );
325+
assert(level < hwloc_get_type_depth(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE));
325326
return level;
326327
#else
327328
return PARSEC_ERR_NOT_IMPLEMENTED;
@@ -331,7 +332,7 @@ int parsec_hwloc_core_first_hrwd_ancestor_depth(void)
331332
int parsec_hwloc_get_nb_objects(int level)
332333
{
333334
#if defined(PARSEC_HAVE_HWLOC)
334-
return hwloc_get_nbobjs_by_depth(topology, level);
335+
return hwloc_get_nbobjs_by_depth(parsec_hwloc_loaded_topology, level);
335336
#else
336337
(void)level;
337338
return PARSEC_ERR_NOT_IMPLEMENTED;
@@ -342,10 +343,10 @@ int parsec_hwloc_get_nb_objects(int level)
342343
int parsec_hwloc_socket_id(int core_id )
343344
{
344345
#if defined(PARSEC_HAVE_HWLOC)
345-
hwloc_obj_t core = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core_id);
346+
hwloc_obj_t core = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE, core_id);
346347
hwloc_obj_t socket = NULL;
347348
if( NULL == core ) return PARSEC_ERR_NOT_FOUND; /* protect against NULL objects */
348-
if( NULL != (socket = hwloc_get_ancestor_obj_by_type(topology,
349+
if( NULL != (socket = hwloc_get_ancestor_obj_by_type(parsec_hwloc_loaded_topology,
349350
HWLOC_OBJ_SOCKET, core)) ) {
350351
return socket->logical_index;
351352
}
@@ -358,10 +359,10 @@ int parsec_hwloc_socket_id(int core_id )
358359
int parsec_hwloc_numa_id(int core_id )
359360
{
360361
#if defined(PARSEC_HAVE_HWLOC)
361-
hwloc_obj_t core = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core_id);
362+
hwloc_obj_t core = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE, core_id);
362363
hwloc_obj_t node = NULL;
363364
if( NULL == core ) return PARSEC_ERR_NOT_FOUND; /* protect against NULL objects */
364-
if( NULL != (node = hwloc_get_ancestor_obj_by_type(topology , HWLOC_OBJ_NODE, core)) ) {
365+
if( NULL != (node = hwloc_get_ancestor_obj_by_type(parsec_hwloc_loaded_topology , HWLOC_OBJ_NODE, core)) ) {
365366
return node->logical_index;
366367
}
367368
#else
@@ -373,9 +374,9 @@ int parsec_hwloc_numa_id(int core_id )
373374
unsigned int parsec_hwloc_nb_cores_per_obj( int level, int index )
374375
{
375376
#if defined(PARSEC_HAVE_HWLOC)
376-
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, level, index);
377+
hwloc_obj_t obj = hwloc_get_obj_by_depth(parsec_hwloc_loaded_topology, level, index);
377378
if(NULL == obj) return PARSEC_ERR_NOT_FOUND;
378-
return hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_CORE);
379+
return hwloc_get_nbobjs_inside_cpuset_by_type(parsec_hwloc_loaded_topology, obj->cpuset, HWLOC_OBJ_CORE);
379380
#else
380381
(void)level; (void)index;
381382
return PARSEC_ERR_NOT_IMPLEMENTED;
@@ -384,15 +385,15 @@ unsigned int parsec_hwloc_nb_cores_per_obj( int level, int index )
384385

385386
hwloc_cpuset_t parsec_hwloc_cpuset_per_obj(int level, int index)
386387
{
387-
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, level, index);
388+
hwloc_obj_t obj = hwloc_get_obj_by_depth(parsec_hwloc_loaded_topology, level, index);
388389
if(NULL == obj) return NULL;
389390
return HWLOC_DUP(obj->cpuset);
390391
}
391392

392393
int parsec_hwloc_nb_levels(void)
393394
{
394395
#if defined(PARSEC_HAVE_HWLOC)
395-
return hwloc_get_type_depth(topology, HWLOC_OBJ_CORE);
396+
return hwloc_get_type_depth(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE);
396397
#else
397398
return PARSEC_ERR_NOT_IMPLEMENTED;
398399
#endif /* defined(PARSEC_HAVE_HWLOC) */
@@ -413,11 +414,11 @@ char *parsec_hwloc_get_binding(hwloc_cpuset_t* cpuset, int flag)
413414

414415
/** No need to check for return code: the set will be unchanged (0x0)
415416
* if get_cpubind fails */
416-
hwloc_get_cpubind(topology, stack_cpuset, flag );
417+
hwloc_get_cpubind(parsec_hwloc_loaded_topology, stack_cpuset, flag );
417418
HWLOC_ASPRINTF(&binding, stack_cpuset);
418419
HWLOC_FREE(stack_cpuset);
419420
} else {
420-
hwloc_get_cpubind(topology, *cpuset, flag );
421+
hwloc_get_cpubind(parsec_hwloc_loaded_topology, *cpuset, flag );
421422
HWLOC_ASPRINTF(&binding, *cpuset);
422423
}
423424
return binding;
@@ -441,7 +442,7 @@ int parsec_hwloc_bind_on_core_index(int cpu_index, int local_ht_index)
441442
}
442443

443444
/* Get the core of index cpu_index */
444-
obj = core = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, cpu_index);
445+
obj = core = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE, cpu_index);
445446
if (!core) {
446447
parsec_warning("parsec_hwloc: unable to get the core of index %i (nb physical cores = %i )",
447448
cpu_index, parsec_hwloc_nb_real_cores());
@@ -497,7 +498,7 @@ static hwloc_cpuset_t parsec_hwloc_cpuset_convert_to_system(hwloc_cpuset_t cpuse
497498
/* For each index in the mask, get the associated cpu object and use its cpuset to add it to the binding mask */
498499
hwloc_bitmap_foreach_begin(cpu_index, cpuset) {
499500
/* Get the core of index cpu */
500-
obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, cpu_index);
501+
obj = hwloc_get_obj_by_type(parsec_hwloc_loaded_topology, HWLOC_OBJ_CORE, cpu_index);
501502
if (!obj) {
502503
PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "parsec_hwloc_bind_on_mask_index: unable to get the core of index %i", cpu_index);
503504
} else {

parsec/parsec_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ PARSEC_DECLSPEC extern int parsec_runtime_allow_ht;
472472
/* Control the display of the thread bindings */
473473
PARSEC_DECLSPEC extern int parsec_report_bindings;
474474
PARSEC_DECLSPEC extern int parsec_report_binding_issues;
475+
PARSEC_DECLSPEC extern int parsec_runtime_singlify_bindings;
475476

476477
/**
477478
* Global configuration variable controlling what tasks are given to the

parsec/vpmap.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ int parsec_vpmap_init(char* optarg, int nb_cores )
132132
for( int i = 0; i < parsec_nbvp; i++ ) {
133133
parsec_vpmap[i].cpuset = HWLOC_ALLOC();
134134
for( int j = 0; j < parsec_vpmap[i].nbthreads; j++ ) {
135+
if( parsec_runtime_singlify_bindings > 0 ) /* late singlify */
136+
hwloc_bitmap_singlify(parsec_vpmap[i].threads[j].cpuset);
135137
if( HWLOC_INTERSECTS(parsec_vpmap[i].cpuset, parsec_vpmap[i].threads[j].cpuset) ) {
136138
/* overlap detected, show it to the user */
137139
if(parsec_report_binding_issues) {
@@ -381,6 +383,8 @@ int parsec_vpmap_init_from_flat(int nbthreads)
381383
parsec_vpmap[0].threads = (vpmap_thread_t*)calloc(parsec_vpmap[0].nbthreads, sizeof(vpmap_thread_t));
382384

383385
int step = nbcores / nbthreads;
386+
if( -1 == parsec_runtime_singlify_bindings ) /* early singlify */
387+
step = 1;
384388
for( int id = 0; id < parsec_vpmap[0].nbthreads; id++ ) {
385389
parsec_vpmap[0].threads[id].nbcores = step;
386390
parsec_vpmap[0].threads[id].cpuset = HWLOC_ALLOC();

0 commit comments

Comments
 (0)