@@ -31,6 +31,7 @@ def plot_video(rows, cols, frame_list, plot_width, plot_height):
3131 2. Minimal demo without sparse temporal sampling for single continuous frame clips, without image transforms
3232 3. Demo with image transforms
3333 4. Demo 3 continued with PyTorch dataloader
34+ 5. Demo of using a dataset where samples have multiple separate class labels
3435
3536 """
3637 videos_root = os .path .join (os .getcwd (), 'demo_dataset' )
@@ -145,7 +146,63 @@ def denormalize(video_tensor):
145146 """
146147 Insert Training Code Here
147148 """
148- print ("Video Batch Tensor Size:" , video_batch .size ())
149- print ("Labels Size:" , labels .size ())
149+ print (labels )
150+ print ("\n Video Batch Tensor Size:" , video_batch .size ())
151+ print ("Batch Labels Size:" , labels .size ())
150152 break
151153 break
154+
155+
156+ """ DEMO 5: SAMPLES WITH MULTIPLE LABELS """
157+ """
158+ Apart from supporting just a single label per sample, VideoFrameDataset also supports multi-label samples,
159+ where a sample can be associated with more than just one label. EPIC-KITCHENS, for example, associates a
160+ noun, verb, and action with each video clip. To support this, instead of each row in annotations.txt
161+ being (VIDEO_PATH, START_FRAME, END_FRAME, LABEL_ID), each row can also be
162+ (VIDEO_PATH, START_FRAME, END_FRAME, LABEL_1_ID, ..., LABEL_N_ID). An example of this can be seen in the
163+ directory `demo_dataset_multilabel`.
164+
165+ Each sample returned by VideoFrameDataset is then ((FRAMESxCHANNELSxHEIGHTxWIDTH), (LABEL_1, ..., LABEL_N)).
166+ When paired with the `torch.utils.data.DataLoader`, instead of yielding each batch as
167+ ((BATCHxFRAMESxCHANNELSxHEIGHTxWIDTH), (BATCH)) where the second tuple item is the labels of the batch,
168+ `torch.utils.data.DataLoader` returns a batch as ((BATCHxFRAMESxCHANNELSxHEIGHTxWIDTH), ((BATCH),...,(BATCH))
169+ where the second tuple item is itself a tuple, with N BATCH-sized tensors of labels, where N is the
170+ number of labels assigned to each sample.
171+ """
172+ videos_root = os .path .join (os .getcwd (), 'demo_dataset_multilabel' )
173+ annotation_file = os .path .join (videos_root , 'annotations.txt' )
174+
175+ dataset = VideoFrameDataset (
176+ root_path = videos_root ,
177+ annotationfile_path = annotation_file ,
178+ num_segments = 5 ,
179+ frames_per_segment = 1 ,
180+ imagefile_template = 'img_{:05d}.jpg' ,
181+ transform = preprocess ,
182+ random_shift = True ,
183+ test_mode = False
184+ )
185+
186+ dataloader = torch .utils .data .DataLoader (
187+ dataset = dataset ,
188+ batch_size = 3 ,
189+ shuffle = True ,
190+ num_workers = 2 ,
191+ pin_memory = True
192+ )
193+
194+ print ("\n Multi-Label Example" )
195+ for epoch in range (10 ):
196+ for batch in dataloader :
197+ """
198+ Insert Training Code Here
199+ """
200+ video_batch , (labels1 , labels2 , labels3 ) = batch
201+
202+ print ("Video Batch Tensor Size:" , video_batch .size ())
203+ print ("Labels1 Size:" , labels1 .size ()) # == batch_size
204+ print ("Labels2 Size:" , labels2 .size ()) # == batch_size
205+ print ("Labels3 Size:" , labels3 .size ()) # == batch_size
206+
207+ break
208+ break
0 commit comments