-
Notifications
You must be signed in to change notification settings - Fork 8
Open
Description
def _wrapFunc(func, funcFlopCompute):
global cuda_sync
oldFunc = func
name = func.__str__
old_functions[name] = oldFunc
func_name = func.__name__
def newFunc(*args, **kwds):
flops, macs = funcFlopCompute(*args, **kwds)
if module_flop_count:
module_flop_count[-1].append((name, flops))
if module_mac_count and macs:
module_mac_count[-1].append((name, macs))
if cuda_sync:
cuda_start = torch.cuda.Event(enable_timing=True)
cuda_end = torch.cuda.Event(enable_timing=True)
cuda_start.record()
else:
start = time.time()
ret = oldFunc(*args, **kwds)
if cuda_sync:
cuda_end.record()
torch.cuda.synchronize()
duration = cuda_start.elapsed_time(cuda_end)/1000
else:
duration = time.time() - start
if module_profile_lists:
module_profile_lists[-1].append((func_name, profileEntry(flops, macs, duration)))
return ret
newFunc.__str__ = func.__str__
return newFunc
For each layer, the funcFlopCompute function will run twice. This first run is the F.linear (or other F function). The second is "ret = oldFunc(*args, **kwds)". That make the flops is twice than the true flops.
Why does the funcFlopCompute run twice for each layer ?
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels