Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify video_domain_adapter #292

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7ccd345
update .gitignore
xianyuanliu Jan 20, 2022
d955f73
update .gitignore
xianyuanliu Jan 20, 2022
1cecdf2
change root dir
xianyuanliu Jan 22, 2022
f9d0577
add EPIC100DatasetAccess
xianyuanliu Jan 22, 2022
046ef98
change transform_kind to transform
xianyuanliu Jan 22, 2022
77f1b0f
add NUM_SEGMENTS
xianyuanliu Jan 22, 2022
8a8581b
add INPUT_TYPE
xianyuanliu Jan 22, 2022
23b0e8e
add functions in VideoDatasetAccess for feature vector input
xianyuanliu Jan 22, 2022
f993f8d
add get_class_type
xianyuanliu Jan 22, 2022
60951d4
add CLASS_TYPE
xianyuanliu Jan 22, 2022
76f3e72
change num_classes to dict_num_classes
xianyuanliu Jan 22, 2022
feaf72a
update ClassNetVideo for dual-class task
xianyuanliu Jan 22, 2022
f5bc2b7
update test
xianyuanliu Jan 22, 2022
63c5be9
Merge branch 'main' into add_feature_vector_dataloader
xianyuanliu Jan 22, 2022
f89d8fc
change output folder to tb_logs
xianyuanliu Jan 22, 2022
b845a88
add get_class_type test
xianyuanliu Jan 22, 2022
ef74b72
update test_video_access
xianyuanliu Jan 22, 2022
b43802c
update config
xianyuanliu Jan 22, 2022
ba6f5c5
test bug fixes
xianyuanliu Jan 23, 2022
bdf9cbb
add VideoFeatureRecord in Videos.py & improve doc
xianyuanliu Jan 23, 2022
3ea4678
add epic100 test & bug fixes
xianyuanliu Jan 23, 2022
1540051
test bug fixes
xianyuanliu Jan 23, 2022
de0e6cd
test bug fixes
xianyuanliu Jan 23, 2022
cf1638b
add BaseAdaptTrainerVideo
xianyuanliu Jan 23, 2022
a2b3ce8
bug fixes
xianyuanliu Jan 23, 2022
4470413
add CLASS_TYPE
xianyuanliu Jan 23, 2022
37aeaac
add conditional function for class type
xianyuanliu Jan 23, 2022
a95a185
rename to num_classes
xianyuanliu Feb 7, 2022
ab23896
change root dir
xianyuanliu Feb 7, 2022
40861fc
Update doc
xianyuanliu Feb 7, 2022
dc4b990
Merge branch 'add_feature_vector_dataloader' into simplify_video_doma…
xianyuanliu Feb 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add VideoFeatureRecord in Videos.py & improve doc
  • Loading branch information
xianyuanliu committed Jan 23, 2022
commit bdf9cbbc75e2a16797acd9d8d122eb81fe48a263
111 changes: 79 additions & 32 deletions kale/loaddata/videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from PIL import Image

Expand All @@ -16,14 +17,13 @@ class VideoRecord(object):
represents a video sample's metadata.

Args:
root_datapath: the system path to the root folder
of the videos.
row: A list with four or more elements where 1) The first
element is the path to the video sample's frames excluding
the root_datapath prefix 2) The second element is the starting frame id of the video
3) The third element is the inclusive ending frame id of the video
4) The fourth element is the label index.
5) any following elements are labels in the case of multi-label classification
root_datapath (Path, optional): the system path to the root folder of the videos.
row (tuple, optional): A list with four or more elements where
1) The first element is the path to the video sample's frames excluding the root_datapath prefix.
2) The second element is the starting frame id of the video.
3) The third element is the inclusive ending frame id of the video.
4) The fourth element is the label index.
5) Any following elements are labels in the case of multi-label classification.
"""

def __init__(self, row, root_datapath):
Expand Down Expand Up @@ -56,6 +56,35 @@ def label(self):
return [int(label_id) for label_id in self._data[3:]]


class VideoFeatureRecord(object):
"""
Helper class for class VideoFeatureDataset. This class represents a video feature vector.

Args:
index (int): the index of the video feature vector.
row (pandas.Series, optional): A series with information of feature vector.
num_segments (int): the number of segments to split the video into.
"""

def __init__(self, index, row, num_segments):
self._data = row
self._index = index
self._n_seg = num_segments

@property
def num_frames(self):
return int(self._n_seg)

@property
def label(self):
if ("verb_class" in self._data) and ("noun_class" in self._data):
return int(self._data.verb_class), int(self._data.noun_class)
elif ("verb_class" in self._data) and ("noun_class" not in self._data):
return [int(self._data.verb_class)]
else:
return 0, 0


class VideoFrameDataset(torch.utils.data.Dataset):
r"""
A highly efficient and adaptable dataset class for videos.
Expand Down Expand Up @@ -97,44 +126,47 @@ class VideoFrameDataset(torch.utils.data.Dataset):
might be ``jumping\0052\`` or ``sample1\`` or ``00053\``.

Args:
root_path: The root path in which video folders lie.
root_path (str, Path): root path in which video folders lie.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is Path a Python variable type?

Copy link
Member Author

@xianyuanliu xianyuanliu Feb 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to pathlib.Path.

this is ROOT_DATA from the description above.
annotationfile_path: The .txt annotation file containing
annotationfile_path (str, Path): .txt annotation file containing
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

one row per video sample as described above.
image_modality: Image modality (RGB or Optical Flow).
num_segments: The number of segments the video should
be divided into to sample frames from.
frames_per_segment: The number of frames that should
image_modality (str): image modality (RGB or Optical Flow).
num_segments (int): number of segments the video should be divided into to sample frames from.
Default is 1 in image mode and 5 in feature vector mode.
frames_per_segment (int): number of frames that should
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lines seem do not exceed 120 characters. Please check the same problems for the other.

be loaded per segment. For each segment's
frame-range, a random start index or the
center is chosen, from which frames_per_segment
consecutive frames are loaded.
imagefile_template: The image filename template that video frame files
imagefile_template (str): image filename template that video frame files
have inside of their video folders as described above.
transform: Transform pipeline that receives a list of PIL images/frames.
random_shift: Whether the frames from each segment should be taken
transform (Compose, optional): transform pipeline that receives a list of PIL images/frames.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

torchvision.transforms.Compose

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

random_shift (bool): whether the frames from each segment should be taken
consecutively starting from the center of the segment, or
consecutively starting from a random location inside the
segment range.
test_mode: Whether this is a test dataset. If so, chooses
test_mode (bool): whether this is a test dataset. If so, chooses
frames from segments with random_shift=False.
input_type (str): type of input. (options: 'image' or 'feature')
num_data_load (int): number of the data to load. (only used in feature vector mode)
total_segments (int): total number of segments a video is divided into. (only used in feature vector mode)

"""

def __init__(
self,
root_path: str,
annotationfile_path: str,
image_modality: str = "rgb",
num_segments: int = 3,
frames_per_segment: int = 1,
imagefile_template: str = "img_{:05d}.jpg",
root_path,
annotationfile_path,
image_modality="rgb",
num_segments=1,
frames_per_segment=1,
imagefile_template="img_{:05d}.jpg",
transform=None,
random_shift: bool = True,
test_mode: bool = False,
input_type: str = "image",
num_data_load: int = None,
total_segments: int = 25,
random_shift=True,
test_mode=False,
input_type="image",
num_data_load=None,
total_segments=25,
):
super(VideoFrameDataset, self).__init__()

Expand Down Expand Up @@ -196,7 +228,19 @@ def _read_feature_vector(self):
self._data = dict(zip(data_narrations, data_features))

def _parse_list(self):
self.video_list = [VideoRecord(x.strip().split(" "), self.root_path) for x in open(self.annotationfile_path)]
if self.input_type == "image":
self.video_list = [
VideoRecord(x.strip().split(" "), self.root_path) for x in open(self.annotationfile_path)
]
elif self.input_type == "feature":
label_file = pd.read_pickle(self.annotationfile_path).reset_index()
self.video_list = [
VideoFeatureRecord(i, row[1], self.total_segments) for i, row in enumerate(label_file.iterrows())
]
# repeat the list if the length is less than num_data_load (especially for target data)
n_repeat = self.num_data_load // len(self.video_list)
n_left = self.num_data_load % len(self.video_list)
self.video_list = self.video_list * n_repeat + self.video_list[:n_left]

def _get_random_indices(self, record):
"""
Expand Down Expand Up @@ -311,8 +355,11 @@ def _get(self, record, indices):
seg_img = self._load_feature_vector(frame_index, record.segment_id)
images.extend(seg_img)
image_indices.append(frame_index)
if frame_index < record.end_frame:
frame_index += 1

if self.input_type == "image":
frame_index = frame_index + 1 if frame_index < record.end_frame else frame_index
else: # feature vector does not have record.end_frame.
frame_index = frame_index + 1 if frame_index < record.num_frames else frame_index

if self.input_type == "image" and self.transform is not None:
images = self.transform(images)
Expand Down