looking for some solutions? You are welcome.

SOLVED: Why the utilization of cpu is too high and gpu so low when using keras

zheyuan xu:

Why the utilization of cpu is too high and gpu so low when using keras? Like this picture, enter image description here For every 5 seconds, utilization of gpu is about 80% during 1 second and 0 the rest of time. I use tensorflow.keras.utils.Sequence to load data, and use model.fit_generator to train. I don't kown how to deal with these problems.

train.py

def create_callbacks(opt, steps_per_epoch, model=None):
    log_dir = os.path.join(opt.root_path, opt.log_dir)
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    #tensorboard = TensorBoard(log_dir=log_dir, write_graph=True)

    result_path = os.path.join(opt.root_path, opt.result_path)
    if not os.path.exists(result_path):
        os.mkdir(result_path)

    if model is not None:
        checkpoint = ParallelModelCheckpoint(model, os.path.join(result_path, 'ep{epoch:03d}-val_acc{val_acc:.2f}.h5'),
                                    monitor='val_acc', save_weights_only=True, save_best_only=True, period=1)
    else:
        checkpoint = ModelCheckpoint(os.path.join(result_path, 'ep{epoch:03d}-val_acc{val_acc:.2f}.h5'),
                                    monitor='val_acc', save_weights_only=True, save_best_only=True, period=1)
    early_stopping = EarlyStopping(monitor='val_acc', min_delta=0, patience=10)
    learning_rate_scheduler = SGDRScheduler_with_WarmUp(0, opt.lr, steps_per_epoch, lr_decay=opt.lr_decay, 
                                                        cycle_length=opt.cycle_length, multi_factor=opt.multi_factor,
                                                        warm_up_epoch=opt.warm_up_epoch)

    print_lr = PrintLearningRate()

    return [learning_rate_scheduler, print_lr, checkpoint, early_stopping]



def train(opt):
    K.clear_session()
    video_input = Input(shape=(None, None, None, 3))
    model = nets.network[opt.network](video_input, num_classes=opt.num_classes)
    print("Create {} model with {} classes".format(opt.network, opt.num_classes))

    if opt.pretrained_weights is not None:
        model.load_weights(opt.pretrained_weights)
        print("Loading weights from {}".format(opt.pretrained_weights))

    optimizer = get_optimizer(opt)

    train_data_generator = DataGenerator(opt.data_name, opt.video_path, opt.train_list, opt.name_path, 
                                        'train', opt.batch_size, opt.num_classes, True, opt.short_side, 
                                        opt.crop_size, opt.clip_len, opt.n_samples_for_each_video)
    val_data_generator = DataGenerator(opt.data_name, opt.video_path, opt.val_list, opt.name_path, 'val', 
                                        opt.batch_size, opt.num_classes, False, opt.short_side, 
                                        opt.crop_size, opt.clip_len, opt.n_samples_for_each_video)


    callbacks = create_callbacks(opt, max(1, train_data_generator.__len__()), model)

    if len(opt.gpus) > 1:
        print('Using multi gpus')
        parallel_model = multi_gpu_model(model, gpus=len(opt.gpus))
        parallel_model.compile(optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy'])
        parallel_model.fit_generator(train_data_generator, steps_per_epoch=max(1, train_data_generator.__len__()),
                            epochs=opt.epochs, validation_data=val_data_generator, validation_steps=max(1, val_data_generator.__len__()),
                            workers=opt.workers, callbacks=callbacks, use_multiprocessing=True)
    else:
        model.compile(optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy'])
        model.fit_generator(train_data_generator, steps_per_epoch=max(1, train_data_generator.__len__()),
                            epochs=opt.epochs, validation_data=val_data_generator, validation_steps=max(1, val_data_generator.__len__()),
                            workers=opt.workers, callbacks=callbacks, use_multiprocessing=True)
    model.save_weights(os.path.join(os.path.join(opt.root_path, opt.result_path), 'trained_weights_final.h5'))


if __name__=="__main__":
    opt = parse_opts()
    print(opt)
    os.environ['CUDA_VISIBLE_DEVICES'] = ",".join(map(str, opt.gpus))
    train(opt)

some parameters like this:

    --num_classes=60 \
--workers=4 \
--batch_size=64 \
--crop_size=160 \
--clip_len=32 \
--short_side 192 224 \
--gpus 8 9

some code of my dataloader of keras

import os
import random
import math
import copy
import time
import numpy as np
from tensorflow.keras.utils import Sequence
from .spatial_transforms import RandomCrop, Scale, RandomHorizontalFlip, CenterCrop, Compose, Normalize, PreCenterCrop
from .tempora_transforms import TemporalRandomCrop, TemporalCenterCrop
from .utils import load_value_file, load_clip_video


def get_ntu(video_path, file_path, name_path, mode, num_classes):
    lines = open(name_path, 'r').readlines()

    assert num_classes == len(lines)

    video_files = []
    label_files = []

    for path in open(file_path, 'r'):
        label = int(path.split('A')[1][:3])-1
        label_files.append(label)
        video_files.append(os.path.join(video_path, path.strip()))

    return video_files, label_files

def get_ucf101(video_path, file_path, name_path, mode, num_classes):
    name2index = {}

    lines = open(name_path, 'r').readlines()
    for i, class_name in enumerate(lines):
        class_name = class_name.split()[1]
        name2index[str(class_name)]=i

    assert num_classes == len(name2index)

    video_files = []
    label_files = []
    for path_label in open(file_path, 'r'):
        if mode == 'train':
            path, _ = path_label.split()
        elif mode == 'val':
            path = path_label
        else:
            raise ValueError('mode must be train or val')
        pathname, _ = os.path.splitext(path)
        video_files.append(os.path.join(video_path, pathname))
        label = pathname.split('/')[0]
        label_files.append(name2index[label])
    return video_files, label_files


class DataGenerator(Sequence):
    def __init__(self, data_name, video_path, file_path, 
                 name_path, mode, batch_size, num_classes, 
                 shuffle, short_side=[256, 320], crop_size=224, 
                 clip_len=64, n_samples_for_each_video=1):
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.shuffle = shuffle
        if data_name == 'ucf101':
            self.video_files, self.label_files = get_ucf101(video_path, file_path, name_path, mode, num_classes)
        elif data_name == 'ntu':
            self.video_files, self.label_files = get_ntu(video_path, file_path, name_path, mode, num_classes) 
        if mode == 'train':
            self.spatial_transforms = Compose([
                PreCenterCrop(),
                RandomCrop(crop_size),
                RandomHorizontalFlip(),
                Normalize()
            ])
            self.temporal_transforms = TemporalRandomCrop(clip_len)
        elif mode == 'val':
            self.spatial_transforms = Compose([
                PreCenterCrop(),
                Scale(crop_size),
                Normalize()
            ])
            self.temporal_transforms = TemporalCenterCrop(clip_len)
        else:
            raise ValueError('mode must be train or val')

        self.dataset = self.makedataset(n_samples_for_each_video, clip_len)
        print('Dataset loading Successful!!!')
        if self.shuffle:
            random.shuffle(self.dataset)

    def __len__(self):
        return math.ceil(len(self.video_files)/self.batch_size)

    def __getitem__(self, index):
        batch_dataset = self.dataset[index*self.batch_size:(index+1)*self.batch_size]
        video_data, label_data = self.data_generator(batch_dataset)
        return video_data, label_data

    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.dataset)

    def makedataset(self, n_samples_for_each_video, clip_len):
        dataset = []
        for i, video_file in enumerate(self.video_files):
            if i % 1000 == 0:
                print('dataset loading [{}/{}]'.format(i, len(self.video_files)))

            if not os.path.exists(video_file):
                print('{} is not exist'.format(video_file))
                continue

            n_frame_path = os.path.join(video_file, 'n_frames')
            n_frames = int(load_value_file(n_frame_path))

            if n_frames<=0:
                continue

            sample = {
                'video_path':video_file,
                'label':int(self.label_files[i])
            }
            if n_samples_for_each_video == 1:
                sample['frame_indices'] = list(range(1, n_frames+1))
                dataset.append(sample)

            else:
                if n_samples_for_each_video > 1:
                    step = max(1, math.ceil((n_frames - 1 - clip_len) / (n_samples_for_each_video - 1)))
                else:
                    step = clip_len
                for j in range(1, n_frames, step):
                    sample_j = copy.deepcopy(sample)
                    sample_j['frame_indices'] = list(range(j, min(n_frames + 1, j + clip_len)))
                    dataset.append(sample_j)

        return dataset

    def data_generator(self, batch_dataset):
        video_data = []
        label_data = []

        for data in batch_dataset:
            path = data['video_path']
            frame_indices = data['frame_indices']

            if self.temporal_transforms is not None:
                frame_indices = self.temporal_transforms(frame_indices)

            clip = load_clip_video(path, frame_indices)

            if self.spatial_transforms is not None:
                self.spatial_transforms.randomize_parameters()
                clip = [self.spatial_transforms(img) for img in clip]

            clip = np.stack(clip, 0)
            video_data.append(clip)
            label_data.append(data['label'])

        video_data = np.array(video_data)
        label_data = np.eye(self.num_classes)[label_data]
        return video_data, label_data



Posted in S.E.F
via StackOverflow & StackExchange Atomic Web Robots
Share:

No comments:

Recent