diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..657c45603 --- /dev/null +++ b/.gitignore @@ -0,0 +1,148 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# jixunbo +"Icon\r" +experiment +.DS_Store + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# static files generated from Django application using `collectstatic` +media +static \ No newline at end of file diff --git "a/Icon\r" "b/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/README.md b/README.md new file mode 100755 index 000000000..98f8880d8 --- /dev/null +++ b/README.md @@ -0,0 +1,130 @@ +# Multi-Parts Multi-Channels Network for Person Re-identification + +This repo support +- [x] easy dataset preparation, including Market-1501, DukeMTMC-ReID, CUHK03, MOT17... +- [x] sota deep neural networks and various options(tricks) for reid +- [x] easy combination of different kinds of loss function +- [x] end-to-end training and evaluation +- [x] less package requirements + + +List of functions +- Warm up learning rate +- Random erasing augmentation +- Cutout augmentation +- Batch Drop Block +- Label smoothing(Cross Entropy loss) +- Triplet loss +- Multi-Simulatity loss +- Focal loss +- Center loss +- Ranked list loss +- Different optimizers +- Attention modules +- BNNeck + +Inplemented networks: +- Multi-Parts Multi-Channels Network, which we proposed +- PCB [[link]](https://arxiv.org/pdf/1711.09349.pdf) +- MGN [[link]](https://arxiv.org/abs/1804.01438) +- Bag of tricks [[link]](http://openaccess.thecvf.com/content_CVPRW_2019/papers/TRMTMCT/Luo_Bag_of_Tricks_and_a_Strong_Baseline_for_Deep_Person_CVPRW_2019_paper.pdf) +- OSNet [[link]](https://arxiv.org/abs/1905.00953) + + +## Get Started +The designed architecture is concise and easy explicable, where the file engine.py defines the train/ test process and main.py controls the overall epochs, and the folders model, loss, optimizer including respective parts of neural network. + +The user-friendly command-line module argparse helps us indicate different datasets, networks, loss functions, and tricks as we need, +the detailed options/configurations are described in the bottom of this page. + +If you don't have any dataset yet, run `git clone https://github.com/jixunbo/ReIDataset.git` to download Market-1501, DukeMTMC, and MOT17. + +To inplement Multi-Parts Multi-Channels Network, run +`python [path to repo]/main.py --datadir [path to datasets] --data_train DukeMTMC --data_test DukeMTMC --model MCMP_n --batchid 8 --batchimage 8 --batchtest 32 --test_every 10 --epochs 120 --save '' --decay_type step_50_80_110 --loss 0.5*CrossEntropy+0.5*MSLoss --margin 0.75 --nGPU 1 --lr 3.5e-4 --optimizer ADAM --random_erasing --warmup 'constant' --if_labelsmooth --feats 512` + +Also, using pre-defined config file +`python [path to repo]/main.py --config [path to repo]/mpmc_config.yaml --save ''` + +All logs, results and parameters will be saved in folder 'experiment'. + +Note that, the option '--datadir' is the dataset root, which contains folder Market-1501, DukeMTMC-ReID etw.. +'--data_train' and '--data_test' specify the name of train/test dataset, which we can train on one dataset but test on another dataset. +'--batchid 6' and '--batchimage 8' indicate that each batch contrains 6 persons, each person has 8 different images, totally 48 images. +'--epochs' is the epochs we'd like to train, while '--test_every 10' means evaluation will be excuted in every 10 epochs, the parameters of network and optimizer are updated after every every evaluation. + +Actually, for the MPMC model we have two kinds of backbone, MPMC_r we use ResNet 50 as backbone, while MPMC_n is OSNet, OSNet contrains much less parameters but could achieve a little bit better performance than ResNet50. + +If you would like to re-inplement Bag of Tricks, run +`python [path to repo]/main.py --datadir [path to datasets] --data_train Market1501 --data_test Market1501 --model ResNet50 --batchid 16 --batchimage 4 --batchtest 32 --test_every 10 --epochs 120 --save '' --decay_type step_40_70 --loss 0.5*CrossEntropy+0.5*Triplet --margin 0.3 --nGPU 1 --lr 3.5e-4 --optimizer ADAM --random_erasing --warmup 'linear' --if_labelsmooth` +or +`python [path to repo]/main.py --config [path to repo]/bag_of_tricks_config.yaml --save` + +If you would like to re-inplement PCB, run +`python [path to repo]/main.py --datadir [path to datasets] --data_train Market1501 --data_test Market1501 --model PCB --batchid 8 --batchimage 8 --batchtest 32 --test_every 10 --epochs 120 --save '' --decay_type step_50_80_110 --loss 0.5*CrossEntropy+0.5*MSLoss --margin 0.75 --nGPU 1 --lr 5e-3 --optimizer ADAM --random_erasing --warmup 'constant' --if_labelsmooth --bnneck --parts 3` + +Note that, the option '--parts' is used to set the number of stripes to be devided, original paper set 6. + +And also, for MGN model run +`python [path to repo]/main.py --datadir [path to datasets] --data_train Market1501 --data_test Market1501 --model MGN --batchid 16 --batchimage 4 --batchtest 32 --test_every 10 --epochs 120 --save '' --decay_type step_50_80_110 --loss 0.5*CrossEntropy+0.5*Triplet --margin 1.2 --nGPU 1 --lr 2e-4 --optimizer ADAM --random_erasing --warmup 'constant' --if_labelsmooth` + +If you have pretrained model and config file, run +`python [path to repo]/main.py --test_only --config [path to repo]/mpmc_config.yaml --pre_train [path to pretrained model]` to see the performance of the model. + +If you want to resume training process, we assume you have the checkpoint file 'model.pth.tar-latest', run +`python [path to repo]/main.py --config [path to repo]/mpmc_config.yaml --load [path to checkpoint]` + +Of course, you can also set options individually using argparse command-line without config file. + +##Easy Inplementation +Our code can be inplemented easily without install any package or requirement thanks to Google Colab, all the packages we need are Colab standard pre-installed packages. +Open this [notebook](https://colab.research.google.com/drive/14aRebdOqJSfNlwXiI5USOQBgweckUwLS), following the steps there and you can see the training process and results. +Please be sure that your are using Google's powerful GPU(Tesla P100 or T4). +The whole training process(120 epochs) takes ~9 hours. + +If you are hard-core player ^ ^ and you'd like to try different models or options, see Get Started as follows. + + +###Option Description +'--nThread': type=int, default=4, number of threads for data loading +'--cpu', action='store_true', if raise, use cpu only. +'--nGPU', type=int, default=1, number of GPUs. +''--config', type=str, default="", config path,if you have config file,use to set options, you don't need to input any option again. + '--datadir', type=str, is the dataset root, which contains folder Market-1501, DukeMTMC-ReID etw.. +'--data_train' and '--data_test', type=str, specify the name of train/test dataset, which we can train on one dataset but test on another dataset. +, supported options: Market1501, DukeMTMC, MOT17, CUHK03. +'--batchid 6' and '--batchimage 8': type=int, indicate that each batch contrains 6 persons, each person has 8 different images, totally 48 images. +'--sampler', type=str,default='True', if 'True', sample batchid persons and batchimage in a batch, else, ramdom selected totally batchid\*batchimage in a batch. +''--batchtest', type=int, default=32, total batch size for evaluation. +'--test_only', action='store_true', if raise, only run the evaluation. +'--save', type=str, default='test', name of the folder to save output, if '', then it will create the name using current time. +'--load', type=str, default='', name of the output folder, if there is a checkpoint file in the folder, it will resume trainning. +'--pre_train', type=str, default='', path of pre-trained model file. +'--epochs', type=int, is the epochs we'd like to train, while '--test_every 10' means evaluation will be excuted in every 10 epochs, the parameters of network and optimizer are updated after every every evaluation. +'--model', default='MGN', name of model, options: MPMC_n, MPMC_r, ResNet50, PCB, MGN. +'--loss', type=str, default='0.5\*CrossEntropy+0.5\*Triplet', you can combine different loss functions and corresponding weights, you can use only one loss function or 2 and more functions, e.g. '1\*CrossEntropy', '0.5\*CrossEntropy+0.5\*MSLoss+0.0005\*CenterLoss', options: CrossEntropy, Triplet, MSLoss, CenterLoss, Focal, GroupLoss. +'--margin', type=float, margin for Triplet and MSLoss. +'--if_labelsmooth', action='store_true', if raise, label smoothing on. +'--bnneck', action='store_true', if raise, use BNNeck, only for ResNet and PCB. +'--drop_block', action='store_true', if raise, use Batch Drop Block. +'--pool', type=str, default='avg', choose pooling method, options: avg, max. +'--feats', type=int, default=256, dimension of feature maps for evaluation. +'--height', type=int, default=384, height of the input image. +''--width', type=int, default=128, width of the input image. +'--num_classes', type=int, default=751, number of classes of train dataset, but normally you don't need to set it, it'll be automatically setted. +'--lr', type=float, default=2e-4, initial learning rate. +'--gamma', type=float, default=0.1,learning rate decay factor for step decay. +'--warmup', type=str, default='none', learning rate warmup method, options: linear, constant, none +'--pcb_different_lr', type=str, default='True', if 'True', use different lr only for PCB, if lr is 5e-3, then lr for classifier is 5e-3, lr for other part is 5e-4. +'--optimizer, default='ADAM', options: 'SGD','ADAM','NADAM','RMSprop'. +'--momentum', type=float, default=0.9, SGD momentum. +'--nesterov', action='store_true', if raise, SGD nesterov. +'--parts', type=int, default=6, is used to set the number of stripes to be devided, original paper set 6. +'--re_rank', action='store_true', 'if raise, use re-ranking. +'--cutout', action='store_true', if raise, use cutout augmentation. +'--random_erasing', action='store_true', use random erasing augmentation. +'--probability', type=float, default=0.5, probability of random erasing. +'--T', type=int, default=3, number of iterations of computing group loss. +'--num_anchors', type=int, default=1, number of iterations of computing group loss. + + + diff --git a/bag_of_tricks_config.yaml b/bag_of_tricks_config.yaml new file mode 100644 index 000000000..9d0f94050 --- /dev/null +++ b/bag_of_tricks_config.yaml @@ -0,0 +1,50 @@ +T: 3 +act: relu +amsgrad: false +batchid: 16 +batchimage: 4 +batchtest: 32 +beta1: 0.9 +beta2: 0.999 +bnneck: true +config: '' +cpu: false +cutout: false +dampening: 0 +data_test: DukeMTMC +data_train: DukeMTMC +datadir: /content/ReIDataset/ +decay_type: step_40_70 +drop_block: false +epochs: 120 +epsilon: 1.0e-08 +feat_inference: after +feats: 256 +gamma: 0.1 +h_ratio: 0.33 +height: 384 +if_labelsmooth: true +loss: 1*CrossEntropy+1*Triplet +lr: 0.00035 +lr_decay: 60 +margin: 0.3 +model: ResNet50 +momentum: 0.9 +nGPU: 1 +nThread: 4 +nesterov: true +num_anchors: 1 +num_classes: 702 +optimizer: ADAM +parts: 2 +pcb_different_lr: true +pool: avg +probability: 0.5 +random_erasing: true +reset: false +sampler: true +test_every: 10 +w_ratio: 1.0 +warmup: constant +weight_decay: 0.0005 +width: 128 diff --git "a/data_v1/Icon\r" "b/data_v1/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/data_v1/__init__.py b/data_v1/__init__.py new file mode 100755 index 000000000..2dd17451c --- /dev/null +++ b/data_v1/__init__.py @@ -0,0 +1,100 @@ +from importlib import import_module +from torchvision import transforms +from utils.random_erasing import RandomErasing, Cutout +from .sampler import RandomSampler, RandomIdentitySampler +from torch.utils.data import dataloader + + +class Data: + def __init__(self, args): + + # train_list = [ + # transforms.Resize((args.height, args.width), interpolation=3), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[ + # 0.229, 0.224, 0.225]) + # ] + + train_list = [ + transforms.Resize((args.height, args.width), interpolation=3), + transforms.Pad(10), + transforms.RandomCrop((args.height, args.width)), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ] + if args.random_erasing: + train_list.append(RandomErasing( + probability=args.probability, mean=[0.485, 0.456, 0.406])) + print('Using random_erasing augmentation.') + if args.cutout: + train_list.append(Cutout(mean=[0.485, 0.456, 0.406])) + print('Using cutout augmentation.') + + train_transform = transforms.Compose(train_list) + + test_transform = transforms.Compose([ + transforms.Resize((args.height, args.width), interpolation=3), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[ + 0.229, 0.224, 0.225]) + ]) + if not args.test_only and args.model == 'MGN': + module_train = import_module('data.' + args.data_train.lower()) + self.trainset = getattr(module_train, args.data_train)( + args, train_transform, 'train') + self.train_loader = dataloader.DataLoader(self.trainset, + sampler=RandomIdentitySampler( + self.trainset, args.batchid * args.batchimage, args.batchimage), + # shuffle=True, + batch_size=args.batchid * args.batchimage, + num_workers=args.nThread) + # elif not args.test_only and args.model in ['ResNet50','PCB'] and args.loss.split('*')[1]=='CrossEntropy': + # module_train = import_module('data.' + args.data_train.lower()) + # self.trainset = getattr(module_train, args.data_train)( + # args, train_transform, 'train') + # self.train_loader = dataloader.DataLoader(self.trainset, + # shuffle=True, + # batch_size=args.batchid * args.batchimage, + # num_workers=args.nThread) + elif not args.test_only and args.model in ['ResNet50', 'PCB', 'PCB_v', 'PCB_conv', 'BB_2_db','BB', 'MGDB','MGDB_v2','MGDB_v3','BB_2_v3','BB_2', 'PCB_conv_modi_2', 'BB_2_conv','BB_2_cat', 'BB_4_cat','PCB_conv_modi', 'Pyramid','PLR'] and bool(args.sampler): + + module_train = import_module('data.' + args.data_train.lower()) + self.trainset = getattr(module_train, args.data_train)( + args, train_transform, 'train') + # self.train_loader = dataloader.DataLoader(self.trainset, + # sampler=RandomSampler( + # self.trainset, args.batchid, batch_image=args.batchimage), + # # shuffle=True, + # batch_size=args.batchid * args.batchimage, + # num_workers=args.nThread, + # drop_last=True) + self.train_loader = dataloader.DataLoader(self.trainset, + sampler=RandomIdentitySampler( + self.trainset, args.batchid * args.batchimage, args.batchimage), + # shuffle=True, + batch_size=args.batchid * args.batchimage, + num_workers=args.nThread) + + elif not args.test_only and args.model not in ['MGN', 'ResNet50', 'PCB','BB_2_db', 'PCB_v', 'PCB_conv','MGDB', 'PCB_conv_modi_2', 'PCB_conv_modi', 'BB', 'BB_2','BB_2_cat','BB_4_cat','PLR']: + raise Exception( + 'DataLoader for {} not designed'.format(args.model)) + else: + self.train_loader = None + + if args.data_test in ['Market1501', 'DukeMTMC', 'GTA']: + module = import_module('data.' + args.data_train.lower()) + self.galleryset = getattr(module, args.data_test)( + args, test_transform, 'test') + self.queryset = getattr(module, args.data_test)( + args, test_transform, 'query') + + else: + raise Exception() + # print(len(self.trainset)) + + self.test_loader = dataloader.DataLoader( + self.galleryset, batch_size=args.batchtest, num_workers=args.nThread) + self.query_loader = dataloader.DataLoader( + self.queryset, batch_size=args.batchtest, num_workers=args.nThread) diff --git a/data_v1/dukemtmc.py b/data_v1/dukemtmc.py new file mode 100644 index 000000000..a8be05ec9 --- /dev/null +++ b/data_v1/dukemtmc.py @@ -0,0 +1,83 @@ +from torch.utils.data import dataset +from torchvision.datasets.folder import default_loader +import os +import re + + +class DukeMTMC(dataset.Dataset): + def __init__(self, args, transform, dtype): + + self.transform = transform + self.loader = default_loader + + data_path = args.datadir + if dtype == 'train': + data_path += '/bounding_box_train' + elif dtype == 'test': + data_path += '/bounding_box_test' + else: + data_path += '/query' + + self.imgs = [path for path in self.list_pictures(data_path) if self.id(path) != -1] + + self._id2label = {_id: idx for idx, _id in enumerate(self.unique_ids)} + + def __getitem__(self, index): + path = self.imgs[index] + target = self._id2label[self.id(path)] + + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + + return img, target + + def __len__(self): + return len(self.imgs) + + @staticmethod + def id(file_path): + """ + :param file_path: unix style file path + :return: person id + """ + return int(file_path.split('/')[-1].split('_')[0]) + + @staticmethod + def camera(file_path): + """ + :param file_path: unix style file path + :return: camera id + """ + return int(file_path.split('/')[-1].split('_')[1][1]) + + @property + def ids(self): + """ + :return: person id list corresponding to dataset image paths + """ + return [self.id(path) for path in self.imgs] + + @property + def unique_ids(self): + """ + :return: unique person ids in ascending order + """ + return sorted(set(self.ids)) + + @property + def cameras(self): + """ + :return: camera id list corresponding to dataset image paths + """ + return [self.camera(path) for path in self.imgs] + + @staticmethod + def list_pictures(directory, ext='jpg|jpeg|bmp|png|ppm|npy'): + assert os.path.isdir( + directory), 'dataset is not exists!{}'.format(directory) + + return sorted([os.path.join(root, f) + for root, _, files in os.walk(directory) for f in files + if re.match(r'([\w]+\.(?:' + ext + '))', f)]) + diff --git a/data_v1/gta.py b/data_v1/gta.py new file mode 100644 index 000000000..e113d3f73 --- /dev/null +++ b/data_v1/gta.py @@ -0,0 +1,94 @@ +from torch.utils.data import dataset +from torchvision.datasets.folder import default_loader +import os +import re + + +class GTA(dataset.Dataset): + def __init__(self, args, transform, dtype): + + self.transform = transform + self.loader = default_loader + + data_path = args.datadir + if dtype == 'train': + data_path += '/train' + elif dtype == 'test': + data_path += '/gallery' + else: + data_path += '/query' + + self.imgs = [path for path in self.list_pictures(data_path) if self.id(path) != -1] + + self._id2label = {_id: idx for idx, _id in enumerate(self.unique_ids)} + print('{} classes.'.format(len(self.unique_ids))) + + def __getitem__(self, index): + path = self.imgs[index] + target = self._id2label[self.id(path)] + + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + + return img, target + + def __len__(self): + return len(self.imgs) + + @staticmethod + def id(file_path): + """ + :param file_path: unix style file path + :return: person id + """ + return int(file_path.split('/')[-1].split('_')[4]) + + @staticmethod + def camera(file_path): + """ + :param file_path: unix style file path + :return: camera id + """ + return int(file_path.split('/')[-1].split('_')[5][-1]) + + @property + def ids(self): + """ + :return: person id list corresponding to dataset image paths + """ + return [self.id(path) for path in self.imgs] + + @property + def unique_ids(self): + """ + :return: unique person ids in ascending order + """ + return sorted(set(self.ids)) + + + @property + def cameras(self): + """ + :return: camera id list corresponding to dataset image paths + """ + return [self.camera(path) for path in self.imgs] + + @staticmethod + def list_pictures(directory, ext='jpg|jpeg|bmp|png|ppm|npy'): + assert os.path.isdir( + directory), 'dataset is not exists!{}'.format(directory) + imgs=[] + + for d in os.listdir(directory): + if os.path.isdir(os.path.join(directory,d)): + for file in os.listdir(os.path.join(directory,d)): + if file.split('.')[-1] == 'jpeg': + imgs.append(os.path.join(directory,d,file)) + return imgs + + # return sorted([os.path.join(root, f) + # for root, _, files in os.walk(directory) for f in files + # if re.match(r'([\w]+\.(?:' + ext + '))', f)]) +if __name__ == '__main__': + dataset = GTA diff --git a/data_v1/market1501.py b/data_v1/market1501.py new file mode 100755 index 000000000..f95c0af9f --- /dev/null +++ b/data_v1/market1501.py @@ -0,0 +1,83 @@ +from torch.utils.data import dataset +from torchvision.datasets.folder import default_loader +import os +import re + + +class Market1501(dataset.Dataset): + def __init__(self, args, transform, dtype): + + self.transform = transform + self.loader = default_loader + + data_path = args.datadir + if dtype == 'train': + data_path += '/bounding_box_train' + elif dtype == 'test': + data_path += '/bounding_box_test' + else: + data_path += '/query' + + self.imgs = [path for path in self.list_pictures(data_path) if self.id(path) != -1] + + self._id2label = {_id: idx for idx, _id in enumerate(self.unique_ids)} + + def __getitem__(self, index): + path = self.imgs[index] + target = self._id2label[self.id(path)] + + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + + return img, target + + def __len__(self): + return len(self.imgs) + + @staticmethod + def id(file_path): + """ + :param file_path: unix style file path + :return: person id + """ + return int(file_path.split('/')[-1].split('_')[0]) + + @staticmethod + def camera(file_path): + """ + :param file_path: unix style file path + :return: camera id + """ + return int(file_path.split('/')[-1].split('_')[1][1]) + + @property + def ids(self): + """ + :return: person id list corresponding to dataset image paths + """ + return [self.id(path) for path in self.imgs] + + @property + def unique_ids(self): + """ + :return: unique person ids in ascending order + """ + return sorted(set(self.ids)) + + @property + def cameras(self): + """ + :return: camera id list corresponding to dataset image paths + """ + return [self.camera(path) for path in self.imgs] + + @staticmethod + def list_pictures(directory, ext='jpg|jpeg|bmp|png|ppm|npy'): + assert os.path.isdir( + directory), 'dataset is not exists!{}'.format(directory) + + return sorted([os.path.join(root, f) + for root, _, files in os.walk(directory) for f in files + if re.match(r'([\w]+\.(?:' + ext + '))', f)]) + diff --git a/data_v1/sampler.py b/data_v1/sampler.py new file mode 100755 index 000000000..a41f35445 --- /dev/null +++ b/data_v1/sampler.py @@ -0,0 +1,163 @@ +import random +import copy +import collections +import numpy as np +from torch.utils.data import sampler + + +class RandomSampler(sampler.Sampler): + def __init__(self, data_source, batch_id, batch_image): + super(RandomSampler, self).__init__(data_source) + + self.data_source = data_source + self.batch_image = batch_image + self.batch_id = batch_id + + self._id2index = collections.defaultdict(list) + for idx, path in enumerate(data_source.imgs): + _id = data_source.id(path) + self._id2index[_id].append(idx) + + def __iter__(self): + unique_ids = self.data_source.unique_ids + random.shuffle(unique_ids) + + imgs = [] + for _id in unique_ids: + imgs.extend(self._sample(self._id2index[_id], self.batch_image)) + return iter(imgs) + + def __len__(self): + return len(self._id2index) * self.batch_image + + @staticmethod + def _sample(population, k): + if len(population) < k: + population = population * k + return random.sample(population, k) + + +class RandomIdentitySampler(sampler.Sampler): + """ + Randomly sample N identities, then for each identity, + randomly sample K instances, therefore batch size is N*K. + Args: + - data_source (list): list of (img_path, pid, camid). + - num_instances (int): number of instances per identity in a batch. + - batch_size (int): number of examples in a batch. + """ + + def __init__(self, data_source, batch_size, num_instances): + self.data_source = data_source + self.batch_size = batch_size + self.num_instances = num_instances + self.num_pids_per_batch = self.batch_size // self.num_instances + self.index_dic = collections.defaultdict(list) + for index, path in enumerate(self.data_source.imgs): + _id = data_source.id(path) + self.index_dic[_id].append(index) + self.pids = list(self.index_dic.keys()) + + # estimate number of examples in an epoch + self.length = 0 + for pid in self.pids: + idxs = self.index_dic[pid] + num = len(idxs) + if num < self.num_instances: + num = self.num_instances + self.length += num - num % self.num_instances + + def __iter__(self): + batch_idxs_dict = collections.defaultdict(list) + + for pid in self.pids: + idxs = copy.deepcopy(self.index_dic[pid]) + if len(idxs) < self.num_instances: + idxs = np.random.choice(idxs, size=self.num_instances, replace=True) + random.shuffle(idxs) + batch_idxs = [] + for idx in idxs: + batch_idxs.append(idx) + if len(batch_idxs) == self.num_instances: + batch_idxs_dict[pid].append(batch_idxs) + batch_idxs = [] + + avai_pids = copy.deepcopy(self.pids) + final_idxs = [] + + while len(avai_pids) >= self.num_pids_per_batch: + selected_pids = random.sample(avai_pids, self.num_pids_per_batch) + for pid in selected_pids: + batch_idxs = batch_idxs_dict[pid].pop(0) + final_idxs.extend(batch_idxs) + if len(batch_idxs_dict[pid]) == 0: + avai_pids.remove(pid) + + self.length = len(final_idxs) + return iter(final_idxs) + + def __len__(self): + return self.length + + +class a_RandomIdentitySampler(sampler.Sampler): + """ + Randomly sample N identities, then for each identity, + randomly sample K instances, therefore batch size is N*K. + Args: + - data_source (list): list of (img_path, pid, camid). + - num_instances (int): number of instances per identity in a batch. + - batch_size (int): number of examples in a batch. + """ + + def __init__(self, data_source, batch_size, num_instances): + self.data_source = data_source + self.batch_size = batch_size + self.num_instances = num_instances + self.num_pids_per_batch = self.batch_size // self.num_instances + self.index_dic = collections.defaultdict(list) + for index, path in enumerate(self.data_source.imgs): + _id = path[1] + self.index_dic[_id].append(index) + self.pids = list(self.index_dic.keys()) + + # estimate number of examples in an epoch + self.length = 0 + for pid in self.pids: + idxs = self.index_dic[pid] + num = len(idxs) + if num < self.num_instances: + num = self.num_instances + self.length += num - num % self.num_instances + + def __iter__(self): + batch_idxs_dict = collections.defaultdict(list) + + for pid in self.pids: + idxs = copy.deepcopy(self.index_dic[pid]) + if len(idxs) < self.num_instances: + idxs = np.random.choice(idxs, size=self.num_instances, replace=True) + random.shuffle(idxs) + batch_idxs = [] + for idx in idxs: + batch_idxs.append(idx) + if len(batch_idxs) == self.num_instances: + batch_idxs_dict[pid].append(batch_idxs) + batch_idxs = [] + + avai_pids = copy.deepcopy(self.pids) + final_idxs = [] + + while len(avai_pids) >= self.num_pids_per_batch: + selected_pids = random.sample(avai_pids, self.num_pids_per_batch) + for pid in selected_pids: + batch_idxs = batch_idxs_dict[pid].pop(0) + final_idxs.extend(batch_idxs) + if len(batch_idxs_dict[pid]) == 0: + avai_pids.remove(pid) + + self.length = len(final_idxs) + return iter(final_idxs) + + def __len__(self): + return self.length diff --git "a/data_v2/Icon\r" "b/data_v2/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/data_v2/__init__.py b/data_v2/__init__.py new file mode 100755 index 000000000..26fe22faf --- /dev/null +++ b/data_v2/__init__.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from __future__ import print_function + +from .datasets import Dataset, ImageDataset, VideoDataset +from .datasets import register_image_dataset +from .datasets import register_video_dataset +from .datamanager import ImageDataManager, VideoDataManager \ No newline at end of file diff --git a/data_v2/datamanager.py b/data_v2/datamanager.py new file mode 100755 index 000000000..b6a008f34 --- /dev/null +++ b/data_v2/datamanager.py @@ -0,0 +1,415 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import torch + +from .sampler import build_train_sampler +from .transforms import build_transforms +from .datasets import init_image_dataset, init_video_dataset + + +class DataManager(object): + r"""Base data manager. + + Args: + sources (str or list): source dataset(s). + targets (str or list, optional): target dataset(s). If not given, + it equals to ``sources``. + height (int, optional): target image height. Default is 256. + width (int, optional): target image width. Default is 128. + transforms (str or list of str, optional): transformations applied to model training. + Default is 'random_flip'. + norm_mean (list or None, optional): data mean. Default is None (use imagenet mean). + norm_std (list or None, optional): data std. Default is None (use imagenet std). + use_gpu (bool, optional): use gpu. Default is True. + """ + + def __init__(self, sources=None, targets=None, height=256, width=128, transforms='random_flip', + norm_mean=None, norm_std=None, use_gpu=False): + self.sources = sources + self.targets = targets + self.height = height + self.width = width + + if self.sources is None: + raise ValueError('sources must not be None') + + if isinstance(self.sources, str): + self.sources = [self.sources] + + if self.targets is None: + self.targets = self.sources + + if isinstance(self.targets, str): + self.targets = [self.targets] + + self.transform_tr, self.transform_te = build_transforms( + self.height, self.width, transforms=transforms, + norm_mean=norm_mean, norm_std=norm_std + ) + + self.use_gpu = (torch.cuda.is_available() and use_gpu) + + @property + def num_train_pids(self): + """Returns the number of training person identities.""" + return self._num_train_pids + + @property + def num_train_cams(self): + """Returns the number of training cameras.""" + return self._num_train_cams + + def return_dataloaders(self): + """Returns trainloader and testloader.""" + return self.trainloader, self.testloader + + def return_testdataset_by_name(self, name): + """Returns query and gallery of a test dataset, each containing + tuples of (img_path(s), pid, camid). + + Args: + name (str): dataset name. + """ + return self.testdataset[name]['query'], self.testdataset[name]['gallery'] + + +class ImageDataManager(DataManager): + r"""Image data manager. + + Args: + root (str): root path to datasets. + sources (str or list): source dataset(s). + targets (str or list, optional): target dataset(s). If not given, + it equals to ``sources``. + height (int, optional): target image height. Default is 256. + width (int, optional): target image width. Default is 128. + transforms (str or list of str, optional): transformations applied to model training. + Default is 'random_flip'. + norm_mean (list or None, optional): data mean. Default is None (use imagenet mean). + norm_std (list or None, optional): data std. Default is None (use imagenet std). + use_gpu (bool, optional): use gpu. Default is True. + split_id (int, optional): split id (*0-based*). Default is 0. + combineall (bool, optional): combine train, query and gallery in a dataset for + training. Default is False. + batch_size_train (int, optional): number of images in a training batch. Default is 32. + batch_size_test (int, optional): number of images in a test batch. Default is 32. + workers (int, optional): number of workers. Default is 4. + num_instances (int, optional): number of instances per identity in a batch. + Default is 4. + train_sampler (str, optional): sampler. Default is empty (``RandomSampler``). + cuhk03_labeled (bool, optional): use cuhk03 labeled images. + Default is False (defaul is to use detected images). + cuhk03_classic_split (bool, optional): use the classic split in cuhk03. + Default is False. + market1501_500k (bool, optional): add 500K distractors to the gallery + set in market1501. Default is False. + + Examples:: + + datamanager = torchreid.data.ImageDataManager( + root='path/to/reid-data', + sources='market1501', + height=256, + width=128, + batch_size_train=32, + batch_size_test=100 + ) + """ + data_type = 'image' + + def __init__(self, args): + + root = args.datadir + sources = args.data_train.lower() + targets = args.data_test.lower() + height = args.height + width = args.width + transforms = ['random_flip'] + norm_mean = [0.485, 0.456, 0.406] + norm_std = [0.229, 0.224, 0.225] + use_gpu = not args.cpu + split_id = 0 + combineall = False + batch_size_train = args.batchid * args.batchimage + num_instances = args.batchimage + batch_size_test = args.batchtest + workers = args.nThread + train_sampler = 'random' + cuhk03_labeled = False + cuhk03_classic_split = False + market1501_500k = False + transforms = ['random_flip'] + + if args.random_erasing: + transforms.append('random_erase') + if args.cutout: + transforms.append('cutout') + if args.sampler: + train_sampler = 'RandomIdentitySampler' + + super(ImageDataManager, self).__init__(sources=sources, targets=targets, height=height, width=width, + transforms=transforms, norm_mean=norm_mean, norm_std=norm_std, + use_gpu=use_gpu) + print('=> Loading train (source) dataset') + trainset = [] + for name in self.sources: + trainset_ = init_image_dataset( + name, + transform=self.transform_tr, + mode='train', + combineall=combineall, + root=root, + split_id=split_id, + cuhk03_labeled=cuhk03_labeled, + cuhk03_classic_split=cuhk03_classic_split, + market1501_500k=market1501_500k + ) + trainset.append(trainset_) + trainset = sum(trainset) + + self._num_train_pids = trainset.num_train_pids + self._num_train_cams = trainset.num_train_cams + + train_sampler = build_train_sampler( + trainset.train, train_sampler, + batch_size=batch_size_train, + num_instances=num_instances + ) + + self.train_loader = torch.utils.data.DataLoader( + trainset, + sampler=train_sampler, + batch_size=batch_size_train, + shuffle=False, + num_workers=workers, + pin_memory=self.use_gpu, + drop_last=True + ) + + print('=> Loading test (target) dataset') + self.testloader = {name: {'query': None, 'gallery': None} + for name in self.targets} + self.testdataset = {name: {'query': None, 'gallery': None} + for name in self.targets} + + for name in self.targets: + # build query loader + queryset = init_image_dataset( + name, + transform=self.transform_te, + mode='query', + combineall=combineall, + root=root, + split_id=split_id, + cuhk03_labeled=cuhk03_labeled, + cuhk03_classic_split=cuhk03_classic_split, + market1501_500k=market1501_500k + ) + self.testloader[name]['query'] = torch.utils.data.DataLoader( + queryset, + batch_size=batch_size_test, + shuffle=False, + num_workers=workers, + pin_memory=self.use_gpu, + drop_last=False + ) + + # build gallery loader + galleryset = init_image_dataset( + name, + transform=self.transform_te, + mode='gallery', + combineall=combineall, + verbose=False, + root=root, + split_id=split_id, + cuhk03_labeled=cuhk03_labeled, + cuhk03_classic_split=cuhk03_classic_split, + market1501_500k=market1501_500k + ) + self.testloader[name]['gallery'] = torch.utils.data.DataLoader( + galleryset, + batch_size=batch_size_test, + shuffle=False, + num_workers=workers, + pin_memory=self.use_gpu, + drop_last=False + ) + self.query_loader = self.testloader[name]['query'] + self.test_loader = self.testloader[name]['gallery'] + self.galleryset = galleryset + self.queryset = queryset + self.testdataset[name]['query'] = queryset.query + self.testdataset[name]['gallery'] = galleryset.gallery + args.num_classes = self.num_train_pids + + print('\n') + print(' **************** Summary ****************') + print(' train : {}'.format(self.sources)) + print(' # train datasets : {}'.format(len(self.sources))) + print(' # train ids : {}'.format(self.num_train_pids)) + print(' # train images : {}'.format(len(trainset))) + print(' # train cameras : {}'.format(self.num_train_cams)) + print(' test : {}'.format(self.targets)) + print(' *****************************************') + print('\n') + + +class VideoDataManager(DataManager): + r"""Video data manager. + + Args: + root (str): root path to datasets. + sources (str or list): source dataset(s). + targets (str or list, optional): target dataset(s). If not given, + it equals to ``sources``. + height (int, optional): target image height. Default is 256. + width (int, optional): target image width. Default is 128. + transforms (str or list of str, optional): transformations applied to model training. + Default is 'random_flip'. + norm_mean (list or None, optional): data mean. Default is None (use imagenet mean). + norm_std (list or None, optional): data std. Default is None (use imagenet std). + use_gpu (bool, optional): use gpu. Default is True. + split_id (int, optional): split id (*0-based*). Default is 0. + combineall (bool, optional): combine train, query and gallery in a dataset for + training. Default is False. + batch_size_train (int, optional): number of tracklets in a training batch. Default is 3. + batch_size_test (int, optional): number of tracklets in a test batch. Default is 3. + workers (int, optional): number of workers. Default is 4. + num_instances (int, optional): number of instances per identity in a batch. + Default is 4. + train_sampler (str, optional): sampler. Default is empty (``RandomSampler``). + seq_len (int, optional): how many images to sample in a tracklet. Default is 15. + sample_method (str, optional): how to sample images in a tracklet. Default is "evenly". + Choices are ["evenly", "random", "all"]. "evenly" and "random" will sample ``seq_len`` + images in a tracklet while "all" samples all images in a tracklet, where the batch size + needs to be set to 1. + + Examples:: + + datamanager = torchreid.data.VideoDataManager( + root='path/to/reid-data', + sources='mars', + height=256, + width=128, + batch_size_train=3, + batch_size_test=3, + seq_len=15, + sample_method='evenly' + ) + + .. note:: + The current implementation only supports image-like training. Therefore, each image in a + sampled tracklet will undergo independent transformation functions. To achieve tracklet-aware + training, you need to modify the transformation functions for video reid such that each function + applies the same operation to all images in a tracklet to keep consistency. + """ + data_type = 'video' + + def __init__(self, root='', sources=None, targets=None, height=256, width=128, transforms='random_flip', + norm_mean=None, norm_std=None, use_gpu=True, split_id=0, combineall=False, + batch_size_train=3, batch_size_test=3, workers=4, num_instances=4, train_sampler=None, + seq_len=15, sample_method='evenly'): + + super(VideoDataManager, self).__init__(sources=sources, targets=targets, height=height, width=width, + transforms=transforms, norm_mean=norm_mean, norm_std=norm_std, + use_gpu=use_gpu) + + print('=> Loading train (source) dataset') + trainset = [] + for name in self.sources: + trainset_ = init_video_dataset( + name, + transform=self.transform_tr, + mode='train', + combineall=combineall, + root=root, + split_id=split_id, + seq_len=seq_len, + sample_method=sample_method + ) + trainset.append(trainset_) + trainset = sum(trainset) + + self._num_train_pids = trainset.num_train_pids + self._num_train_cams = trainset.num_train_cams + + train_sampler = build_train_sampler( + trainset.train, train_sampler, + batch_size=batch_size_train, + num_instances=num_instances + ) + + self.trainloader = torch.utils.data.DataLoader( + trainset, + sampler=train_sampler, + batch_size=batch_size_train, + shuffle=False, + num_workers=workers, + pin_memory=self.use_gpu, + drop_last=True + ) + + print('=> Loading test (target) dataset') + self.testloader = {name: {'query': None, 'gallery': None} + for name in self.targets} + self.testdataset = {name: {'query': None, 'gallery': None} + for name in self.targets} + + for name in self.targets: + # build query loader + queryset = init_video_dataset( + name, + transform=self.transform_te, + mode='query', + combineall=combineall, + root=root, + split_id=split_id, + seq_len=seq_len, + sample_method=sample_method + ) + self.testloader[name]['query'] = torch.utils.data.DataLoader( + queryset, + batch_size=batch_size_test, + shuffle=False, + num_workers=workers, + pin_memory=self.use_gpu, + drop_last=False + ) + + # build gallery loader + galleryset = init_video_dataset( + name, + transform=self.transform_te, + mode='gallery', + combineall=combineall, + verbose=False, + root=root, + split_id=split_id, + seq_len=seq_len, + sample_method=sample_method + ) + self.testloader[name]['gallery'] = torch.utils.data.DataLoader( + galleryset, + batch_size=batch_size_test, + shuffle=False, + num_workers=workers, + pin_memory=self.use_gpu, + drop_last=False + ) + + self.testdataset[name]['query'] = queryset.query + self.testdataset[name]['gallery'] = galleryset.gallery + + print('\n') + print(' **************** Summary ****************') + print(' train : {}'.format(self.sources)) + print(' # train datasets : {}'.format(len(self.sources))) + print(' # train ids : {}'.format(self.num_train_pids)) + print(' # train tracklets : {}'.format(len(trainset))) + print(' # train cameras : {}'.format(self.num_train_cams)) + print(' test : {}'.format(self.targets)) + print(' *****************************************') + print('\n') diff --git "a/data_v2/datasets/Icon\r" "b/data_v2/datasets/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/data_v2/datasets/__init__.py b/data_v2/datasets/__init__.py new file mode 100755 index 000000000..65f0dbbc0 --- /dev/null +++ b/data_v2/datasets/__init__.py @@ -0,0 +1,112 @@ +from __future__ import absolute_import +from __future__ import print_function + +from .dataset import Dataset, ImageDataset, VideoDataset +from .image import * +from .video import * + + +__image_datasets = { + 'market1501': Market1501, + 'cuhk03': CUHK03, + 'cuhk03_detected': CUHK03_Detected, + 'cuhk03_labeled': CUHK03_Labeled, + 'dukemtmc': DukeMTMCreID, + 'msmt17': MSMT17, + 'viper': VIPeR, + 'grid': GRID, + 'cuhk01': CUHK01, + 'ilids': iLIDS, + 'sensereid': SenseReID, + 'prid': PRID, + 'cuhk02': CUHK02, + 'mot17': MOT17 +} + + +__video_datasets = { + 'mars': Mars, + 'ilidsvid': iLIDSVID, + 'prid2011': PRID2011, + 'dukemtmcvidreid': DukeMTMCVidReID +} + + +def init_image_dataset(name, **kwargs): + """Initializes an image dataset.""" + avai_datasets = list(__image_datasets.keys()) + if name not in avai_datasets: + raise ValueError('Invalid dataset name. Received "{}", ' + 'but expected to be one of {}'.format(name, avai_datasets)) + return __image_datasets[name](**kwargs) + + +def init_video_dataset(name, **kwargs): + """Initializes a video dataset.""" + avai_datasets = list(__video_datasets.keys()) + if name not in avai_datasets: + raise ValueError('Invalid dataset name. Received "{}", ' + 'but expected to be one of {}'.format(name, avai_datasets)) + return __video_datasets[name](**kwargs) + + +def register_image_dataset(name, dataset): + """Registers a new image dataset. + + Args: + name (str): key corresponding to the new dataset. + dataset (Dataset): the new dataset class. + + Examples:: + + import torchreid + import NewDataset + torchreid.data.register_image_dataset('new_dataset', NewDataset) + # single dataset case + datamanager = torchreid.data.ImageDataManager( + root='reid-data', + sources='new_dataset' + ) + # multiple dataset case + datamanager = torchreid.data.ImageDataManager( + root='reid-data', + sources=['new_dataset', 'dukemtmcreid'] + ) + """ + global __image_datasets + curr_datasets = list(__image_datasets.keys()) + if name in curr_datasets: + raise ValueError('The given name already exists, please choose ' + 'another name excluding {}'.format(curr_datasets)) + __image_datasets[name] = dataset + + +def register_video_dataset(name, dataset): + """Registers a new video dataset. + + Args: + name (str): key corresponding to the new dataset. + dataset (Dataset): the new dataset class. + + Examples:: + + import torchreid + import NewDataset + torchreid.data.register_video_dataset('new_dataset', NewDataset) + # single dataset case + datamanager = torchreid.data.VideoDataManager( + root='reid-data', + sources='new_dataset' + ) + # multiple dataset case + datamanager = torchreid.data.VideoDataManager( + root='reid-data', + sources=['new_dataset', 'ilidsvid'] + ) + """ + global __video_datasets + curr_datasets = list(__video_datasets.keys()) + if name in curr_datasets: + raise ValueError('The given name already exists, please choose ' + 'another name excluding {}'.format(curr_datasets)) + __video_datasets[name] = dataset diff --git a/data_v2/datasets/dataset.py b/data_v2/datasets/dataset.py new file mode 100755 index 000000000..91f302a00 --- /dev/null +++ b/data_v2/datasets/dataset.py @@ -0,0 +1,344 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import numpy as np +import tarfile +import zipfile +import copy + +import torch + +from ..utils import read_image, mkdir_if_missing, download_url + + +class Dataset(object): + """An abstract class representing a Dataset. + + This is the base class for ``ImageDataset`` and ``VideoDataset``. + + Args: + train (list): contains tuples of (img_path(s), pid, camid). + query (list): contains tuples of (img_path(s), pid, camid). + gallery (list): contains tuples of (img_path(s), pid, camid). + transform: transform function. + mode (str): 'train', 'query' or 'gallery'. + combineall (bool): combines train, query and gallery in a + dataset for training. + verbose (bool): show information. + """ + _junk_pids = [] # contains useless person IDs, e.g. background, false detections + + def __init__(self, train, query, gallery, transform=None, mode='train', + combineall=False, verbose=True, **kwargs): + self.train = train + self.query = query + self.gallery = gallery + self.transform = transform + self.mode = mode + self.combineall = combineall + self.verbose = verbose + + self.num_train_pids = self.get_num_pids(self.train) + self.num_train_cams = self.get_num_cams(self.train) + + if self.combineall: + self.combine_all() + + if self.mode == 'train': + self.data = self.train + elif self.mode == 'query': + self.data = self.query + elif self.mode == 'gallery': + self.data = self.gallery + else: + raise ValueError('Invalid mode. Got {}, but expected to be ' + 'one of [train | query | gallery]'.format(self.mode)) + + if self.verbose: + self.show_summary() + + def __getitem__(self, index): + raise NotImplementedError + + def __len__(self): + return len(self.data) + + def __add__(self, other): + """Adds two datasets together (only the train set).""" + train = copy.deepcopy(self.train) + + for img_path, pid, camid in other.train: + pid += self.num_train_pids + camid += self.num_train_cams + train.append((img_path, pid, camid)) + + ################################### + # Things to do beforehand: + # 1. set verbose=False to avoid unnecessary print + # 2. set combineall=False because combineall would have been applied + # if it was True for a specific dataset, setting it to True will + # create new IDs that should have been included + ################################### + if isinstance(train[0][0], str): + return ImageDataset( + train, self.query, self.gallery, + transform=self.transform, + mode=self.mode, + combineall=False, + verbose=False + ) + else: + return VideoDataset( + train, self.query, self.gallery, + transform=self.transform, + mode=self.mode, + combineall=False, + verbose=False + ) + + def __radd__(self, other): + """Supports sum([dataset1, dataset2, dataset3]).""" + if other == 0: + return self + else: + return self.__add__(other) + + def parse_data(self, data): + """Parses data list and returns the number of person IDs + and the number of camera views. + + Args: + data (list): contains tuples of (img_path(s), pid, camid) + """ + pids = set() + cams = set() + for _, pid, camid in data: + pids.add(pid) + cams.add(camid) + return len(pids), len(cams) + + def get_num_pids(self, data): + """Returns the number of training person identities.""" + return self.parse_data(data)[0] + + def get_num_cams(self, data): + """Returns the number of training cameras.""" + return self.parse_data(data)[1] + + def show_summary(self): + """Shows dataset statistics.""" + pass + + def combine_all(self): + """Combines train, query and gallery in a dataset for training.""" + combined = copy.deepcopy(self.train) + + # relabel pids in gallery (query shares the same scope) + g_pids = set() + for _, pid, _ in self.gallery: + if pid in self._junk_pids: + continue + g_pids.add(pid) + pid2label = {pid: i for i, pid in enumerate(g_pids)} + + def _combine_data(data): + for img_path, pid, camid in data: + if pid in self._junk_pids: + continue + pid = pid2label[pid] + self.num_train_pids + combined.append((img_path, pid, camid)) + + _combine_data(self.query) + _combine_data(self.gallery) + + self.train = combined + self.num_train_pids = self.get_num_pids(self.train) + + def download_dataset(self, dataset_dir, dataset_url): + """Downloads and extracts dataset. + + Args: + dataset_dir (str): dataset directory. + dataset_url (str): url to download dataset. + """ + if osp.exists(dataset_dir): + return + + if dataset_url is None: + raise RuntimeError('{} dataset needs to be manually ' + 'prepared, please follow the ' + 'document to prepare this dataset'.format(self.__class__.__name__)) + + print('Creating directory "{}"'.format(dataset_dir)) + mkdir_if_missing(dataset_dir) + fpath = osp.join(dataset_dir, osp.basename(dataset_url)) + + print('Downloading {} dataset to "{}"'.format(self.__class__.__name__, dataset_dir)) + download_url(dataset_url, fpath) + + print('Extracting "{}"'.format(fpath)) + extension = osp.basename(fpath).split('.')[-1] + try: + tar = tarfile.open(fpath) + tar.extractall(path=dataset_dir) + tar.close() + except: + zip_ref = zipfile.ZipFile(fpath, 'r') + zip_ref.extractall(dataset_dir) + zip_ref.close() + + print('{} dataset is ready'.format(self.__class__.__name__)) + + def check_before_run(self, required_files): + """Checks if required files exist before going deeper. + + Args: + required_files (str or list): string file name(s). + """ + if isinstance(required_files, str): + required_files = [required_files] + + for fpath in required_files: + if not osp.exists(fpath): + raise RuntimeError('"{}" is not found'.format(fpath)) + + def __repr__(self): + num_train_pids, num_train_cams = self.parse_data(self.train) + num_query_pids, num_query_cams = self.parse_data(self.query) + num_gallery_pids, num_gallery_cams = self.parse_data(self.gallery) + + msg = ' ----------------------------------------\n' \ + ' subset | # ids | # items | # cameras\n' \ + ' ----------------------------------------\n' \ + ' train | {:5d} | {:7d} | {:9d}\n' \ + ' query | {:5d} | {:7d} | {:9d}\n' \ + ' gallery | {:5d} | {:7d} | {:9d}\n' \ + ' ----------------------------------------\n' \ + ' items: images/tracklets for image/video dataset\n'.format( + num_train_pids, len(self.train), num_train_cams, + num_query_pids, len(self.query), num_query_cams, + num_gallery_pids, len(self.gallery), num_gallery_cams + ) + + return msg + + +class ImageDataset(Dataset): + """A base class representing ImageDataset. + + All other image datasets should subclass it. + + ``__getitem__`` returns an image given index. + It will return ``img``, ``pid``, ``camid`` and ``img_path`` + where ``img`` has shape (channel, height, width). As a result, + data in each batch has shape (batch_size, channel, height, width). + """ + + def __init__(self, train, query, gallery, **kwargs): + super(ImageDataset, self).__init__(train, query, gallery, **kwargs) + + def __getitem__(self, index): + img_path, pid, camid = self.data[index] + img = read_image(img_path) + if self.transform is not None: + img = self.transform(img) + return img, pid, camid, img_path + # return img, pid + + + def show_summary(self): + num_train_pids, num_train_cams = self.parse_data(self.train) + num_query_pids, num_query_cams = self.parse_data(self.query) + num_gallery_pids, num_gallery_cams = self.parse_data(self.gallery) + + print('=> Loaded {}'.format(self.__class__.__name__)) + print(' ----------------------------------------') + print(' subset | # ids | # images | # cameras') + print(' ----------------------------------------') + print(' train | {:5d} | {:8d} | {:9d}'.format(num_train_pids, len(self.train), num_train_cams)) + print(' query | {:5d} | {:8d} | {:9d}'.format(num_query_pids, len(self.query), num_query_cams)) + print(' gallery | {:5d} | {:8d} | {:9d}'.format(num_gallery_pids, len(self.gallery), num_gallery_cams)) + print(' ----------------------------------------') + + +class VideoDataset(Dataset): + """A base class representing VideoDataset. + + All other video datasets should subclass it. + + ``__getitem__`` returns an image given index. + It will return ``imgs``, ``pid`` and ``camid`` + where ``imgs`` has shape (seq_len, channel, height, width). As a result, + data in each batch has shape (batch_size, seq_len, channel, height, width). + """ + + def __init__(self, train, query, gallery, seq_len=15, sample_method='evenly', **kwargs): + super(VideoDataset, self).__init__(train, query, gallery, **kwargs) + self.seq_len = seq_len + self.sample_method = sample_method + + if self.transform is None: + raise RuntimeError('transform must not be None') + + def __getitem__(self, index): + img_paths, pid, camid = self.data[index] + num_imgs = len(img_paths) + + if self.sample_method == 'random': + # Randomly samples seq_len images from a tracklet of length num_imgs, + # if num_imgs is smaller than seq_len, then replicates images + indices = np.arange(num_imgs) + replace = False if num_imgs>=self.seq_len else True + indices = np.random.choice(indices, size=self.seq_len, replace=replace) + # sort indices to keep temporal order (comment it to be order-agnostic) + indices = np.sort(indices) + + elif self.sample_method == 'evenly': + # Evenly samples seq_len images from a tracklet + if num_imgs >= self.seq_len: + num_imgs -= num_imgs % self.seq_len + indices = np.arange(0, num_imgs, num_imgs/self.seq_len) + else: + # if num_imgs is smaller than seq_len, simply replicate the last image + # until the seq_len requirement is satisfied + indices = np.arange(0, num_imgs) + num_pads = self.seq_len - num_imgs + indices = np.concatenate([indices, np.ones(num_pads).astype(np.int32)*(num_imgs-1)]) + assert len(indices) == self.seq_len + + elif self.sample_method == 'all': + # Samples all images in a tracklet. batch_size must be set to 1 + indices = np.arange(num_imgs) + + else: + raise ValueError('Unknown sample method: {}'.format(self.sample_method)) + + imgs = [] + for index in indices: + img_path = img_paths[int(index)] + img = read_image(img_path) + if self.transform is not None: + img = self.transform(img) + img = img.unsqueeze(0) # img must be torch.Tensor + imgs.append(img) + imgs = torch.cat(imgs, dim=0) + + return imgs, pid, camid + + def show_summary(self): + num_train_pids, num_train_cams = self.parse_data(self.train) + num_query_pids, num_query_cams = self.parse_data(self.query) + num_gallery_pids, num_gallery_cams = self.parse_data(self.gallery) + + print('=> Loaded {}'.format(self.__class__.__name__)) + print(' -------------------------------------------') + print(' subset | # ids | # tracklets | # cameras') + print(' -------------------------------------------') + print(' train | {:5d} | {:11d} | {:9d}'.format(num_train_pids, len(self.train), num_train_cams)) + print(' query | {:5d} | {:11d} | {:9d}'.format(num_query_pids, len(self.query), num_query_cams)) + print(' gallery | {:5d} | {:11d} | {:9d}'.format(num_gallery_pids, len(self.gallery), num_gallery_cams)) + print(' -------------------------------------------') \ No newline at end of file diff --git "a/data_v2/datasets/image/Icon\r" "b/data_v2/datasets/image/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/data_v2/datasets/image/__init__.py b/data_v2/datasets/image/__init__.py new file mode 100755 index 000000000..7bbcf6976 --- /dev/null +++ b/data_v2/datasets/image/__init__.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import +from __future__ import print_function + +from .market1501 import Market1501 +from .dukemtmcreid import DukeMTMCreID +from .cuhk03 import CUHK03 +from .msmt17 import MSMT17 +from .viper import VIPeR +from .grid import GRID +from .cuhk01 import CUHK01 +from .ilids import iLIDS +from .sensereid import SenseReID +from .prid import PRID +from .cuhk02 import CUHK02 +from .cuhk03_detected import CUHK03_Detected +from .cuhk03_labeled import CUHK03_Labeled +from .msmt17 import MSMT17 +from .mot17 import MOT17 diff --git a/data_v2/datasets/image/cuhk01.py b/data_v2/datasets/image/cuhk01.py new file mode 100755 index 000000000..42ac1ac13 --- /dev/null +++ b/data_v2/datasets/image/cuhk01.py @@ -0,0 +1,135 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import zipfile +import numpy as np + +from .. import ImageDataset +from ..utils import read_json, write_json + + +class CUHK01(ImageDataset): + """CUHK01. + + Reference: + Li et al. Human Reidentification with Transferred Metric Learning. ACCV 2012. + + URL: ``_ + + Dataset statistics: + - identities: 971. + - images: 3884. + - cameras: 4. + """ + dataset_dir = 'cuhk01' + dataset_url = None + + def __init__(self, root='', split_id=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.zip_path = osp.join(self.dataset_dir, 'CUHK01.zip') + self.campus_dir = osp.join(self.dataset_dir, 'campus') + self.split_path = osp.join(self.dataset_dir, 'splits.json') + + self.extract_file() + + required_files = [ + self.dataset_dir, + self.campus_dir + ] + self.check_before_run(required_files) + + self.prepare_split() + splits = read_json(self.split_path) + if split_id >= len(splits): + raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1)) + split = splits[split_id] + + train = split['train'] + query = split['query'] + gallery = split['gallery'] + + train = [tuple(item) for item in train] + query = [tuple(item) for item in query] + gallery = [tuple(item) for item in gallery] + + super(CUHK01, self).__init__(train, query, gallery, **kwargs) + + def extract_file(self): + if not osp.exists(self.campus_dir): + print('Extracting files') + zip_ref = zipfile.ZipFile(self.zip_path, 'r') + zip_ref.extractall(self.dataset_dir) + zip_ref.close() + + def prepare_split(self): + """ + Image name format: 0001001.png, where first four digits represent identity + and last four digits represent cameras. Camera 1&2 are considered the same + view and camera 3&4 are considered the same view. + """ + if not osp.exists(self.split_path): + print('Creating 10 random splits of train ids and test ids') + img_paths = sorted(glob.glob(osp.join(self.campus_dir, '*.png'))) + img_list = [] + pid_container = set() + for img_path in img_paths: + img_name = osp.basename(img_path) + pid = int(img_name[:4]) - 1 + camid = (int(img_name[4:7]) - 1) // 2 # result is either 0 or 1 + img_list.append((img_path, pid, camid)) + pid_container.add(pid) + + num_pids = len(pid_container) + num_train_pids = num_pids // 2 + + splits = [] + for _ in range(10): + order = np.arange(num_pids) + np.random.shuffle(order) + train_idxs = order[:num_train_pids] + train_idxs = np.sort(train_idxs) + idx2label = {idx: label for label, idx in enumerate(train_idxs)} + + train, test_a, test_b = [], [], [] + for img_path, pid, camid in img_list: + if pid in train_idxs: + train.append((img_path, idx2label[pid], camid)) + else: + if camid == 0: + test_a.append((img_path, pid, camid)) + else: + test_b.append((img_path, pid, camid)) + + # use cameraA as query and cameraB as gallery + split = { + 'train': train, + 'query': test_a, + 'gallery': test_b, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } + splits.append(split) + + # use cameraB as query and cameraA as gallery + split = { + 'train': train, + 'query': test_b, + 'gallery': test_a, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } + splits.append(split) + + print('Totally {} splits are created'.format(len(splits))) + write_json(splits, self.split_path) + print('Split file saved to {}'.format(self.split_path)) diff --git a/data_v2/datasets/image/cuhk02.py b/data_v2/datasets/image/cuhk02.py new file mode 100755 index 000000000..ea7432b81 --- /dev/null +++ b/data_v2/datasets/image/cuhk02.py @@ -0,0 +1,93 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob + +from .. import ImageDataset + + +class CUHK02(ImageDataset): + """CUHK02. + + Reference: + Li and Wang. Locally Aligned Feature Transforms across Views. CVPR 2013. + + URL: ``_ + + Dataset statistics: + - 5 camera view pairs each with two cameras + - 971, 306, 107, 193 and 239 identities from P1 - P5 + - totally 1,816 identities + - image format is png + + Protocol: Use P1 - P4 for training and P5 for evaluation. + """ + dataset_dir = 'cuhk02' + cam_pairs = ['P1', 'P2', 'P3', 'P4', 'P5'] + test_cam_pair = 'P5' + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir, 'Dataset') + + required_files = [self.dataset_dir] + self.check_before_run(required_files) + + train, query, gallery = self.get_data_list() + + super(CUHK02, self).__init__(train, query, gallery, **kwargs) + + def get_data_list(self): + num_train_pids, camid = 0, 0 + train, query, gallery = [], [], [] + + for cam_pair in self.cam_pairs: + cam_pair_dir = osp.join(self.dataset_dir, cam_pair) + + cam1_dir = osp.join(cam_pair_dir, 'cam1') + cam2_dir = osp.join(cam_pair_dir, 'cam2') + + impaths1 = glob.glob(osp.join(cam1_dir, '*.png')) + impaths2 = glob.glob(osp.join(cam2_dir, '*.png')) + + if cam_pair == self.test_cam_pair: + # add images to query + for impath in impaths1: + pid = osp.basename(impath).split('_')[0] + pid = int(pid) + query.append((impath, pid, camid)) + camid += 1 + + # add images to gallery + for impath in impaths2: + pid = osp.basename(impath).split('_')[0] + pid = int(pid) + gallery.append((impath, pid, camid)) + camid += 1 + + else: + pids1 = [osp.basename(impath).split('_')[0] for impath in impaths1] + pids2 = [osp.basename(impath).split('_')[0] for impath in impaths2] + pids = set(pids1 + pids2) + pid2label = {pid: label+num_train_pids for label, pid in enumerate(pids)} + + # add images to train from cam1 + for impath in impaths1: + pid = osp.basename(impath).split('_')[0] + pid = pid2label[pid] + train.append((impath, pid, camid)) + camid += 1 + + # add images to train from cam1 + for impath in impaths1: + pid = osp.basename(impath).split('_')[0] + pid = pid2label[pid] + train.append((impath, pid, camid)) + camid += 1 + num_train_pids += len(pids) + + return train, query, gallery diff --git a/data_v2/datasets/image/cuhk03.py b/data_v2/datasets/image/cuhk03.py new file mode 100755 index 000000000..06f3ef7e1 --- /dev/null +++ b/data_v2/datasets/image/cuhk03.py @@ -0,0 +1,259 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp + +from .. import ImageDataset +from ..utils import mkdir_if_missing, read_json, write_json + + +class CUHK03(ImageDataset): + """CUHK03. + + Reference: + Li et al. DeepReID: Deep Filter Pairing Neural Network for Person Re-identification. CVPR 2014. + + URL: ``_ + + Dataset statistics: + - identities: 1360. + - images: 13164. + - cameras: 6. + - splits: 20 (classic). + """ + dataset_dir = 'CUHK03' + #dataset_url = None + + def __init__(self, root='', split_id=0, cuhk03_labeled=False, cuhk03_classic_split=False, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + #self.download_dataset(self.dataset_dir, self.dataset_url) + + self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release') + self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat') + + self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected') + self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled') + + self.split_classic_det_json_path = osp.join(self.dataset_dir, 'splits_classic_detected.json') + self.split_classic_lab_json_path = osp.join(self.dataset_dir, 'splits_classic_labeled.json') + + self.split_new_det_json_path = osp.join(self.dataset_dir, 'splits_new_detected.json') + self.split_new_lab_json_path = osp.join(self.dataset_dir, 'splits_new_labeled.json') + + self.split_new_det_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat') + self.split_new_lab_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat') + + required_files = [ + self.dataset_dir, + self.data_dir, + self.raw_mat_path, + self.split_new_det_mat_path, + self.split_new_lab_mat_path + ] + self.check_before_run(required_files) + + self.preprocess_split() + + if cuhk03_labeled: + split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path + else: + split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path + + splits = read_json(split_path) + assert split_id < len(splits), 'Condition split_id ({}) < len(splits) ({}) is false'.format(split_id, len(splits)) + split = splits[split_id] + + train = split['train'] + query = split['query'] + gallery = split['gallery'] + + super(CUHK03, self).__init__(train, query, gallery, **kwargs) + + def preprocess_split(self): + # This function is a bit complex and ugly, what it does is + # 1. extract data from cuhk-03.mat and save as png images + # 2. create 20 classic splits (Li et al. CVPR'14) + # 3. create new split (Zhong et al. CVPR'17) + if osp.exists(self.imgs_labeled_dir) \ + and osp.exists(self.imgs_detected_dir) \ + and osp.exists(self.split_classic_det_json_path) \ + and osp.exists(self.split_classic_lab_json_path) \ + and osp.exists(self.split_new_det_json_path) \ + and osp.exists(self.split_new_lab_json_path): + return + + import h5py + # from scipy.misc import imsave + import imageio + from scipy.io import loadmat + + mkdir_if_missing(self.imgs_detected_dir) + mkdir_if_missing(self.imgs_labeled_dir) + + print('Extract image data from "{}" and save as png'.format(self.raw_mat_path)) + mat = h5py.File(self.raw_mat_path, 'r') + + def _deref(ref): + return mat[ref][:].T + + def _process_images(img_refs, campid, pid, save_dir): + img_paths = [] # Note: some persons only have images for one view + for imgid, img_ref in enumerate(img_refs): + img = _deref(img_ref) + if img.size==0 or img.ndim<3: + continue # skip empty cell + # images are saved with the following format, index-1 (ensure uniqueness) + # campid: index of camera pair (1-5) + # pid: index of person in 'campid'-th camera pair + # viewid: index of view, {1, 2} + # imgid: index of image, (1-10) + viewid = 1 if imgid<5 else 2 + img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid+1, pid+1, viewid, imgid+1) + img_path = osp.join(save_dir, img_name) + if not osp.isfile(img_path): + # imsave(img_path, img) + imageio.imwrite(img_path,img) + img_paths.append(img_path) + return img_paths + + def _extract_img(image_type): + print('Processing {} images ...'.format(image_type)) + meta_data = [] + imgs_dir = self.imgs_detected_dir if image_type=='detected' else self.imgs_labeled_dir + for campid, camp_ref in enumerate(mat[image_type][0]): + camp = _deref(camp_ref) + num_pids = camp.shape[0] + for pid in range(num_pids): + img_paths = _process_images(camp[pid,:], campid, pid, imgs_dir) + assert len(img_paths) > 0, 'campid{}-pid{} has no images'.format(campid, pid) + meta_data.append((campid+1, pid+1, img_paths)) + print('- done camera pair {} with {} identities'.format(campid+1, num_pids)) + return meta_data + + meta_detected = _extract_img('detected') + meta_labeled = _extract_img('labeled') + + def _extract_classic_split(meta_data, test_split): + train, test = [], [] + num_train_pids, num_test_pids = 0, 0 + num_train_imgs, num_test_imgs = 0, 0 + for i, (campid, pid, img_paths) in enumerate(meta_data): + + if [campid, pid] in test_split: + for img_path in img_paths: + camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based + test.append((img_path, num_test_pids, camid)) + num_test_pids += 1 + num_test_imgs += len(img_paths) + else: + for img_path in img_paths: + camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based + train.append((img_path, num_train_pids, camid)) + num_train_pids += 1 + num_train_imgs += len(img_paths) + return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs + + print('Creating classic splits (# = 20) ...') + splits_classic_det, splits_classic_lab = [], [] + for split_ref in mat['testsets'][0]: + test_split = _deref(split_ref).tolist() + + # create split for detected images + train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ + _extract_classic_split(meta_detected, test_split) + splits_classic_det.append({ + 'train': train, + 'query': test, + 'gallery': test, + 'num_train_pids': num_train_pids, + 'num_train_imgs': num_train_imgs, + 'num_query_pids': num_test_pids, + 'num_query_imgs': num_test_imgs, + 'num_gallery_pids': num_test_pids, + 'num_gallery_imgs': num_test_imgs + }) + + # create split for labeled images + train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ + _extract_classic_split(meta_labeled, test_split) + splits_classic_lab.append({ + 'train': train, + 'query': test, + 'gallery': test, + 'num_train_pids': num_train_pids, + 'num_train_imgs': num_train_imgs, + 'num_query_pids': num_test_pids, + 'num_query_imgs': num_test_imgs, + 'num_gallery_pids': num_test_pids, + 'num_gallery_imgs': num_test_imgs + }) + + write_json(splits_classic_det, self.split_classic_det_json_path) + write_json(splits_classic_lab, self.split_classic_lab_json_path) + + def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): + tmp_set = [] + unique_pids = set() + for idx in idxs: + img_name = filelist[idx][0] + camid = int(img_name.split('_')[2]) - 1 # make it 0-based + pid = pids[idx] + if relabel: + pid = pid2label[pid] + img_path = osp.join(img_dir, img_name) + tmp_set.append((img_path, int(pid), camid)) + unique_pids.add(pid) + return tmp_set, len(unique_pids), len(idxs) + + def _extract_new_split(split_dict, img_dir): + train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 + pids = split_dict['labels'].flatten() + train_pids = set(pids[train_idxs]) + pid2label = {pid: label for label, pid in enumerate(train_pids)} + query_idxs = split_dict['query_idx'].flatten() - 1 + gallery_idxs = split_dict['gallery_idx'].flatten() - 1 + filelist = split_dict['filelist'].flatten() + train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True) + query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False) + gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) + return train_info, query_info, gallery_info + + print('Creating new split for detected images (767/700) ...') + train_info, query_info, gallery_info = _extract_new_split( + loadmat(self.split_new_det_mat_path), + self.imgs_detected_dir + ) + split = [{ + 'train': train_info[0], + 'query': query_info[0], + 'gallery': gallery_info[0], + 'num_train_pids': train_info[1], + 'num_train_imgs': train_info[2], + 'num_query_pids': query_info[1], + 'num_query_imgs': query_info[2], + 'num_gallery_pids': gallery_info[1], + 'num_gallery_imgs': gallery_info[2] + }] + write_json(split, self.split_new_det_json_path) + + print('Creating new split for labeled images (767/700) ...') + train_info, query_info, gallery_info = _extract_new_split( + loadmat(self.split_new_lab_mat_path), + self.imgs_labeled_dir + ) + split = [{ + 'train': train_info[0], + 'query': query_info[0], + 'gallery': gallery_info[0], + 'num_train_pids': train_info[1], + 'num_train_imgs': train_info[2], + 'num_query_pids': query_info[1], + 'num_query_imgs': query_info[2], + 'num_gallery_pids': gallery_info[1], + 'num_gallery_imgs': gallery_info[2] + }] + write_json(split, self.split_new_lab_json_path) diff --git a/data_v2/datasets/image/cuhk03_detected.py b/data_v2/datasets/image/cuhk03_detected.py new file mode 100755 index 000000000..ba2e2b5bd --- /dev/null +++ b/data_v2/datasets/image/cuhk03_detected.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import re + +from .. import ImageDataset + + +class CUHK03_Detected(ImageDataset): + """DukeMTMC-reID. + + Reference: + - Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. + - Zheng et al. Unlabeled Samples Generated by GAN Improve the Person Re-identification Baseline in vitro. ICCV 2017. + + URL: ``_ + + Dataset statistics: + - identities: 1404 (train + query). + - images:16522 (train) + 2228 (query) + 17661 (gallery). + - cameras: 8. + """ + dataset_dir = 'cuhk03' + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.train_dir = osp.join(self.dataset_dir, 'CUHK03_detected/bounding_box_train') + self.query_dir = osp.join(self.dataset_dir, 'CUHK03_detected/query') + self.gallery_dir = osp.join(self.dataset_dir, 'CUHK03_detected/bounding_box_test') + + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) + + train = self.process_dir(self.train_dir, relabel=True) + query = self.process_dir(self.query_dir, relabel=False) + gallery = self.process_dir(self.gallery_dir, relabel=False) + + super(CUHK03_Detected, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.png')) + pattern = re.compile(r'([-\d]+)_c(\d)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + pid_container.add(pid) + pid2label = {pid:label for label, pid in enumerate(pid_container)} + + data = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + #assert 1 <= camid <= 8 + camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + data.append((img_path, pid, camid)) + + return data diff --git a/data_v2/datasets/image/cuhk03_labeled.py b/data_v2/datasets/image/cuhk03_labeled.py new file mode 100755 index 000000000..20ed2e93d --- /dev/null +++ b/data_v2/datasets/image/cuhk03_labeled.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import re + +from .. import ImageDataset + + +class CUHK03_Labeled(ImageDataset): + """DukeMTMC-reID. + + Reference: + - Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. + - Zheng et al. Unlabeled Samples Generated by GAN Improve the Person Re-identification Baseline in vitro. ICCV 2017. + + URL: ``_ + + Dataset statistics: + - identities: 1404 (train + query). + - images:16522 (train) + 2228 (query) + 17661 (gallery). + - cameras: 8. + """ + dataset_dir = 'cuhk03' + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.train_dir = osp.join(self.dataset_dir, 'CUHK03_labeled/bounding_box_train') + self.query_dir = osp.join(self.dataset_dir, 'CUHK03_labeled/query') + self.gallery_dir = osp.join(self.dataset_dir, 'CUHK03_labeled/bounding_box_test') + + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) + + train = self.process_dir(self.train_dir, relabel=True) + query = self.process_dir(self.query_dir, relabel=False) + gallery = self.process_dir(self.gallery_dir, relabel=False) + + super(CUHK03_Labeled, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.png')) + pattern = re.compile(r'([-\d]+)_c(\d)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + pid_container.add(pid) + pid2label = {pid:label for label, pid in enumerate(pid_container)} + + data = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + #assert 1 <= camid <= 8 + camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + data.append((img_path, pid, camid)) + + return data diff --git a/data_v2/datasets/image/dukemtmcreid.py b/data_v2/datasets/image/dukemtmcreid.py new file mode 100755 index 000000000..de4f5097e --- /dev/null +++ b/data_v2/datasets/image/dukemtmcreid.py @@ -0,0 +1,71 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import re + +from .. import ImageDataset + + +class DukeMTMCreID(ImageDataset): + """DukeMTMC-reID. + + Reference: + - Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. + - Zheng et al. Unlabeled Samples Generated by GAN Improve the Person Re-identification Baseline in vitro. ICCV 2017. + + URL: ``_ + + Dataset statistics: + - identities: 1404 (train + query). + - images:16522 (train) + 2228 (query) + 17661 (gallery). + - cameras: 8. + """ + dataset_dir = '' + dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-reID.zip' + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + self.train_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/bounding_box_train') + self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/query') + self.gallery_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/bounding_box_test') + + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) + + train = self.process_dir(self.train_dir, relabel=True) + query = self.process_dir(self.query_dir, relabel=False) + gallery = self.process_dir(self.gallery_dir, relabel=False) + + super(DukeMTMCreID, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + pattern = re.compile(r'([-\d]+)_c(\d)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + pid_container.add(pid) + pid2label = {pid:label for label, pid in enumerate(pid_container)} + + data = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + assert 1 <= camid <= 8 + camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + data.append((img_path, pid, camid)) + + return data \ No newline at end of file diff --git a/data_v2/datasets/image/grid.py b/data_v2/datasets/image/grid.py new file mode 100755 index 000000000..2b85fe455 --- /dev/null +++ b/data_v2/datasets/image/grid.py @@ -0,0 +1,114 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +from scipy.io import loadmat + +from .. import ImageDataset +from ..utils import read_json, write_json + + +class GRID(ImageDataset): + """GRID. + + Reference: + Loy et al. Multi-camera activity correlation analysis. CVPR 2009. + + URL: ``_ + + Dataset statistics: + - identities: 250. + - images: 1275. + - cameras: 8. + """ + dataset_dir = 'grid' + dataset_url = 'http://personal.ie.cuhk.edu.hk/~ccloy/files/datasets/underground_reid.zip' + + def __init__(self, root='', split_id=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.probe_path = osp.join(self.dataset_dir, 'underground_reid', 'probe') + self.gallery_path = osp.join(self.dataset_dir, 'underground_reid', 'gallery') + self.split_mat_path = osp.join(self.dataset_dir, 'underground_reid', 'features_and_partitions.mat') + self.split_path = osp.join(self.dataset_dir, 'splits.json') + + required_files = [ + self.dataset_dir, + self.probe_path, + self.gallery_path, + self.split_mat_path + ] + self.check_before_run(required_files) + + self.prepare_split() + splits = read_json(self.split_path) + if split_id >= len(splits): + raise ValueError('split_id exceeds range, received {}, ' + 'but expected between 0 and {}'.format(split_id, len(splits)-1)) + split = splits[split_id] + + train = split['train'] + query = split['query'] + gallery = split['gallery'] + + train = [tuple(item) for item in train] + query = [tuple(item) for item in query] + gallery = [tuple(item) for item in gallery] + + super(GRID, self).__init__(train, query, gallery, **kwargs) + + def prepare_split(self): + if not osp.exists(self.split_path): + print('Creating 10 random splits') + split_mat = loadmat(self.split_mat_path) + trainIdxAll = split_mat['trainIdxAll'][0] # length = 10 + probe_img_paths = sorted(glob.glob(osp.join(self.probe_path, '*.jpeg'))) + gallery_img_paths = sorted(glob.glob(osp.join(self.gallery_path, '*.jpeg'))) + + splits = [] + for split_idx in range(10): + train_idxs = trainIdxAll[split_idx][0][0][2][0].tolist() + assert len(train_idxs) == 125 + idx2label = {idx: label for label, idx in enumerate(train_idxs)} + + train, query, gallery = [], [], [] + + # processing probe folder + for img_path in probe_img_paths: + img_name = osp.basename(img_path) + img_idx = int(img_name.split('_')[0]) + camid = int(img_name.split('_')[1]) - 1 # index starts from 0 + if img_idx in train_idxs: + train.append((img_path, idx2label[img_idx], camid)) + else: + query.append((img_path, img_idx, camid)) + + # process gallery folder + for img_path in gallery_img_paths: + img_name = osp.basename(img_path) + img_idx = int(img_name.split('_')[0]) + camid = int(img_name.split('_')[1]) - 1 # index starts from 0 + if img_idx in train_idxs: + train.append((img_path, idx2label[img_idx], camid)) + else: + gallery.append((img_path, img_idx, camid)) + + split = { + 'train': train, + 'query': query, + 'gallery': gallery, + 'num_train_pids': 125, + 'num_query_pids': 125, + 'num_gallery_pids': 900 + } + splits.append(split) + + print('Totally {} splits are created'.format(len(splits))) + write_json(splits, self.split_path) + print('Split file saved to {}'.format(self.split_path)) \ No newline at end of file diff --git a/data_v2/datasets/image/ilids.py b/data_v2/datasets/image/ilids.py new file mode 100755 index 000000000..daa193d2f --- /dev/null +++ b/data_v2/datasets/image/ilids.py @@ -0,0 +1,142 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import numpy as np +import copy +import random +from collections import defaultdict + +from .. import ImageDataset +from ..utils import read_json, write_json + + +class iLIDS(ImageDataset): + """QMUL-iLIDS. + + Reference: + Zheng et al. Associating Groups of People. BMVC 2009. + + Dataset statistics: + - identities: 119. + - images: 476. + - cameras: 8 (not explicitly provided). + """ + dataset_dir = 'ilids' + dataset_url = 'http://www.eecs.qmul.ac.uk/~jason/data/i-LIDS_Pedestrian.tgz' + + def __init__(self, root='', split_id=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.data_dir = osp.join(self.dataset_dir, 'i-LIDS_Pedestrian/Persons') + self.split_path = osp.join(self.dataset_dir, 'splits.json') + + required_files = [ + self.dataset_dir, + self.data_dir + ] + self.check_before_run(required_files) + + self.prepare_split() + splits = read_json(self.split_path) + if split_id >= len(splits): + raise ValueError('split_id exceeds range, received {}, but ' + 'expected between 0 and {}'.format(split_id, len(splits)-1)) + split = splits[split_id] + + train, query, gallery = self.process_split(split) + + super(iLIDS, self).__init__(train, query, gallery, **kwargs) + + def prepare_split(self): + if not osp.exists(self.split_path): + print('Creating splits ...') + + paths = glob.glob(osp.join(self.data_dir, '*.jpg')) + img_names = [osp.basename(path) for path in paths] + num_imgs = len(img_names) + assert num_imgs == 476, 'There should be 476 images, but ' \ + 'got {}, please check the data'.format(num_imgs) + + # store image names + # image naming format: + # the first four digits denote the person ID + # the last four digits denote the sequence index + pid_dict = defaultdict(list) + for img_name in img_names: + pid = int(img_name[:4]) + pid_dict[pid].append(img_name) + pids = list(pid_dict.keys()) + num_pids = len(pids) + assert num_pids == 119, 'There should be 119 identities, ' \ + 'but got {}, please check the data'.format(num_pids) + + num_train_pids = int(num_pids * 0.5) + num_test_pids = num_pids - num_train_pids # supposed to be 60 + + splits = [] + for _ in range(10): + # randomly choose num_train_pids train IDs and num_test_pids test IDs + pids_copy = copy.deepcopy(pids) + random.shuffle(pids_copy) + train_pids = pids_copy[:num_train_pids] + test_pids = pids_copy[num_train_pids:] + + train = [] + query = [] + gallery = [] + + # for train IDs, all images are used in the train set. + for pid in train_pids: + img_names = pid_dict[pid] + train.extend(img_names) + + # for each test ID, randomly choose two images, one for + # query and the other one for gallery. + for pid in test_pids: + img_names = pid_dict[pid] + samples = random.sample(img_names, 2) + query.append(samples[0]) + gallery.append(samples[1]) + + split = {'train': train, 'query': query, 'gallery': gallery} + splits.append(split) + + print('Totally {} splits are created'.format(len(splits))) + write_json(splits, self.split_path) + print('Split file is saved to {}'.format(self.split_path)) + + def get_pid2label(self, img_names): + pid_container = set() + for img_name in img_names: + pid = int(img_name[:4]) + pid_container.add(pid) + pid2label = {pid: label for label, pid in enumerate(pid_container)} + return pid2label + + def parse_img_names(self, img_names, pid2label=None): + data = [] + + for img_name in img_names: + pid = int(img_name[:4]) + if pid2label is not None: + pid = pid2label[pid] + camid = int(img_name[4:7]) - 1 # 0-based + img_path = osp.join(self.data_dir, img_name) + data.append((img_path, pid, camid)) + + return data + + def process_split(self, split): + train, query, gallery = [], [], [] + train_pid2label = self.get_pid2label(split['train']) + train = self.parse_img_names(split['train'], train_pid2label) + query = self.parse_img_names(split['query']) + gallery = self.parse_img_names(split['gallery']) + return train, query, gallery \ No newline at end of file diff --git a/data_v2/datasets/image/market1501.py b/data_v2/datasets/image/market1501.py new file mode 100755 index 000000000..b093324eb --- /dev/null +++ b/data_v2/datasets/image/market1501.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import re +import warnings + +from .. import ImageDataset + + +class Market1501(ImageDataset): + """Market1501. + + Reference: + Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015. + + URL: ``_ + + Dataset statistics: + - identities: 1501 (+1 for background). + - images: 12936 (train) + 3368 (query) + 15913 (gallery). + """ + _junk_pids = [0, -1] + dataset_dir = 'Market-1501' + dataset_url = 'http://188.138.127.15:81/Datasets/Market-1501-v15.09.15.zip' + + def __init__(self, root='', market1501_500k=False, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + # allow alternative directory structure + self.data_dir = self.dataset_dir + data_dir = osp.join(self.data_dir, 'Market-1501-v15.09.15') + if osp.isdir(data_dir): + self.data_dir = data_dir + else: + warnings.warn('The current data structure is deprecated. Please ' + 'put data folders such as "bounding_box_train" under ' + '"Market-1501-v15.09.15".') + + self.train_dir = osp.join(self.data_dir, 'bounding_box_train') + self.query_dir = osp.join(self.data_dir, 'query') + self.gallery_dir = osp.join(self.data_dir, 'bounding_box_test') + self.extra_gallery_dir = osp.join(self.data_dir, 'images') + self.market1501_500k = market1501_500k + + required_files = [ + self.data_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + if self.market1501_500k: + required_files.append(self.extra_gallery_dir) + self.check_before_run(required_files) + + train = self.process_dir(self.train_dir, relabel=True) + query = self.process_dir(self.query_dir, relabel=False) + gallery = self.process_dir(self.gallery_dir, relabel=False) + if self.market1501_500k: + gallery += self.process_dir(self.extra_gallery_dir, relabel=False) + + super(Market1501, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + pattern = re.compile(r'([-\d]+)_c(\d)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + if pid == -1: + continue # junk images are just ignored + pid_container.add(pid) + pid2label = {pid:label for label, pid in enumerate(pid_container)} + + data = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + if pid == -1: + continue # junk images are just ignored + assert 0 <= pid <= 1501 # pid == 0 means background + assert 1 <= camid <= 6 + camid -= 1 # index starts from 0 + if relabel: + pid = pid2label[pid] + data.append((img_path, pid, camid)) + + return data \ No newline at end of file diff --git a/data_v2/datasets/image/mot17.py b/data_v2/datasets/image/mot17.py new file mode 100644 index 000000000..547692ca8 --- /dev/null +++ b/data_v2/datasets/image/mot17.py @@ -0,0 +1,73 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import re + +from .. import ImageDataset + + +class MOT17(ImageDataset): + """DukeMTMC-reID. + + Reference: + - Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. + - Zheng et al. Unlabeled Samples Generated by GAN Improve the Person Re-identification Baseline in vitro. ICCV 2017. + + URL: ``_ + + Dataset statistics: + - identities: 1404 (train + query). + - images:16522 (train) + 2228 (query) + 17661 (gallery). + - cameras: 8. + """ + dataset_dir = '' + dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-reID.zip' + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + self.train_dir = osp.join(self.dataset_dir, 'train') + # self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/query') + # self.gallery_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/bounding_box_test') + + required_files = [ + self.dataset_dir, + self.train_dir, + # self.query_dir, + # self.gallery_dir + ] + self.check_before_run(required_files) + + train = self.process_dir(self.train_dir, relabel=True) + query = self.process_dir(self.train_dir, relabel=False) + gallery = self.process_dir(self.train_dir, relabel=False) + # query = self.process_dir(self.query_dir, relabel=False) + # gallery = self.process_dir(self.gallery_dir, relabel=False) + + super(MOT17, self).__init__(train,query,gallery, **kwargs) + + def process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + pattern = re.compile(r'([-\d]+)_(\d)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + pid_container.add(pid) + pid2label = {pid:label for label, pid in enumerate(pid_container)} + + data = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + assert 1 <= camid <= 8 + camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + data.append((img_path, pid, camid)) + + return data \ No newline at end of file diff --git a/data_v2/datasets/image/msmt17.py b/data_v2/datasets/image/msmt17.py new file mode 100755 index 000000000..ca1974e1d --- /dev/null +++ b/data_v2/datasets/image/msmt17.py @@ -0,0 +1,70 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import re + +from .. import ImageDataset + + +class MSMT17(ImageDataset): + """ + MSMT17. + + Reference: + Wei et al. Person Transfer GAN to Bridge Domain Gap for Person Re-Identification. CVPR 2018. + + URL: ``_ + + Dataset statistics: + - identities: 4101. + - images: 32621 (train) + 11659 (query) + 82161 (gallery). + - cameras: 15. + """ + + dataset_dir = 'msmt17' + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.train_dir = osp.join(self.dataset_dir, 'MSMT17/bounding_box_train') + self.query_dir = osp.join(self.dataset_dir, 'MSMT17/query') + self.gallery_dir = osp.join(self.dataset_dir, 'MSMT17/bounding_box_test') + + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) + + train = self.process_dir(self.train_dir, relabel=True) + query = self.process_dir(self.query_dir, relabel=False) + gallery = self.process_dir(self.gallery_dir, relabel=False) + + super(MSMT17, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + pattern = re.compile(r'([-\d]+)_c(\d)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + pid_container.add(pid) + pid2label = {pid:label for label, pid in enumerate(pid_container)} + + data = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + assert 1 <= camid <= 15 + camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + data.append((img_path, pid, camid)) + + return data diff --git a/data_v2/datasets/image/prid.py b/data_v2/datasets/image/prid.py new file mode 100755 index 000000000..c9c566f4f --- /dev/null +++ b/data_v2/datasets/image/prid.py @@ -0,0 +1,107 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import random + +from .. import ImageDataset +from ..utils import read_json, write_json + + +class PRID(ImageDataset): + """PRID (single-shot version of prid-2011) + + Reference: + Hirzer et al. Person Re-Identification by Descriptive and Discriminative + Classification. SCIA 2011. + + URL: ``_ + + Dataset statistics: + - Two views. + - View A captures 385 identities. + - View B captures 749 identities. + - 200 identities appear in both views. + """ + dataset_dir = 'prid2011' + dataset_url = None + + def __init__(self, root='', split_id=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.cam_a_dir = osp.join(self.dataset_dir, 'prid_2011', 'single_shot', 'cam_a') + self.cam_b_dir = osp.join(self.dataset_dir, 'prid_2011', 'single_shot', 'cam_b') + self.split_path = osp.join(self.dataset_dir, 'splits_single_shot.json') + + required_files = [ + self.dataset_dir, + self.cam_a_dir, + self.cam_b_dir + ] + self.check_before_run(required_files) + + self.prepare_split() + splits = read_json(self.split_path) + if split_id >= len(splits): + raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1)) + split = splits[split_id] + + train, query, gallery = self.process_split(split) + + super(PRID, self).__init__(train, query, gallery, **kwargs) + + def prepare_split(self): + if not osp.exists(self.split_path): + print('Creating splits ...') + + splits = [] + for _ in range(10): + # randomly sample 100 IDs for train and use the rest 100 IDs for test + # (note: there are only 200 IDs appearing in both views) + pids = [i for i in range(1, 201)] + train_pids = random.sample(pids, 100) + train_pids.sort() + test_pids = [i for i in pids if i not in train_pids] + split = {'train': train_pids, 'test': test_pids} + splits.append(split) + + print('Totally {} splits are created'.format(len(splits))) + write_json(splits, self.split_path) + print('Split file is saved to {}'.format(self.split_path)) + + def process_split(self, split): + train, query, gallery = [], [], [] + train_pids = split['train'] + test_pids = split['test'] + + train_pid2label = {pid: label for label, pid in enumerate(train_pids)} + + # train + train = [] + for pid in train_pids: + img_name = 'person_' + str(pid).zfill(4) + '.png' + pid = train_pid2label[pid] + img_a_path = osp.join(self.cam_a_dir, img_name) + train.append((img_a_path, pid, 0)) + img_b_path = osp.join(self.cam_b_dir, img_name) + train.append((img_b_path, pid, 1)) + + # query and gallery + query, gallery = [], [] + for pid in test_pids: + img_name = 'person_' + str(pid).zfill(4) + '.png' + img_a_path = osp.join(self.cam_a_dir, img_name) + query.append((img_a_path, pid, 0)) + img_b_path = osp.join(self.cam_b_dir, img_name) + gallery.append((img_b_path, pid, 1)) + for pid in range(201, 750): + img_name = 'person_' + str(pid).zfill(4) + '.png' + img_b_path = osp.join(self.cam_b_dir, img_name) + gallery.append((img_b_path, pid, 1)) + + return train, query, gallery \ No newline at end of file diff --git a/data_v2/datasets/image/sensereid.py b/data_v2/datasets/image/sensereid.py new file mode 100755 index 000000000..f49718636 --- /dev/null +++ b/data_v2/datasets/image/sensereid.py @@ -0,0 +1,72 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import copy + +from .. import ImageDataset + + +class SenseReID(ImageDataset): + """SenseReID. + + This dataset is used for test purpose only. + + Reference: + Zhao et al. Spindle Net: Person Re-identification with Human Body + Region Guided Feature Decomposition and Fusion. CVPR 2017. + + URL: ``_ + + Dataset statistics: + - query: 522 ids, 1040 images. + - gallery: 1717 ids, 3388 images. + """ + dataset_dir = 'sensereid' + dataset_url = None + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.query_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_probe') + self.gallery_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_gallery') + + required_files = [ + self.dataset_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) + + query = self.process_dir(self.query_dir) + gallery = self.process_dir(self.gallery_dir) + + # relabel + g_pids = set() + for _, pid, _ in gallery: + g_pids.add(pid) + pid2label = {pid: i for i, pid in enumerate(g_pids)} + + query = [(img_path, pid2label[pid], camid) for img_path, pid, camid in query] + gallery = [(img_path, pid2label[pid], camid) for img_path, pid, camid in gallery] + train = copy.deepcopy(query) + copy.deepcopy(gallery) # dummy variable + + super(SenseReID, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dir_path): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + data = [] + + for img_path in img_paths: + img_name = osp.splitext(osp.basename(img_path))[0] + pid, camid = img_name.split('_') + pid, camid = int(pid), int(camid) + data.append((img_path, pid, camid)) + + return data \ No newline at end of file diff --git a/data_v2/datasets/image/viper.py b/data_v2/datasets/image/viper.py new file mode 100755 index 000000000..32eb1c8a9 --- /dev/null +++ b/data_v2/datasets/image/viper.py @@ -0,0 +1,131 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import numpy as np + +from .. import ImageDataset +from ..utils import read_json, write_json + + +class VIPeR(ImageDataset): + """VIPeR. + + Reference: + Gray et al. Evaluating appearance models for recognition, reacquisition, and tracking. PETS 2007. + + URL: ``_ + + Dataset statistics: + - identities: 632. + - images: 632 x 2 = 1264. + - cameras: 2. + """ + dataset_dir = 'viper' + dataset_url = 'http://users.soe.ucsc.edu/~manduchi/VIPeR.v1.0.zip' + + def __init__(self, root='', split_id=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.cam_a_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_a') + self.cam_b_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_b') + self.split_path = osp.join(self.dataset_dir, 'splits.json') + + required_files = [ + self.dataset_dir, + self.cam_a_dir, + self.cam_b_dir + ] + self.check_before_run(required_files) + + self.prepare_split() + splits = read_json(self.split_path) + if split_id >= len(splits): + raise ValueError('split_id exceeds range, received {}, ' + 'but expected between 0 and {}'.format(split_id, len(splits)-1)) + split = splits[split_id] + + train = split['train'] + query = split['query'] # query and gallery share the same images + gallery = split['gallery'] + + train = [tuple(item) for item in train] + query = [tuple(item) for item in query] + gallery = [tuple(item) for item in gallery] + + super(VIPeR, self).__init__(train, query, gallery, **kwargs) + + def prepare_split(self): + if not osp.exists(self.split_path): + print('Creating 10 random splits of train ids and test ids') + + cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_dir, '*.bmp'))) + cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_dir, '*.bmp'))) + assert len(cam_a_imgs) == len(cam_b_imgs) + num_pids = len(cam_a_imgs) + print('Number of identities: {}'.format(num_pids)) + num_train_pids = num_pids // 2 + + """ + In total, there will be 20 splits because each random split creates two + sub-splits, one using cameraA as query and cameraB as gallery + while the other using cameraB as query and cameraA as gallery. + Therefore, results should be averaged over 20 splits (split_id=0~19). + + In practice, a model trained on split_id=0 can be applied to split_id=0&1 + as split_id=0&1 share the same training data (so on and so forth). + """ + splits = [] + for _ in range(10): + order = np.arange(num_pids) + np.random.shuffle(order) + train_idxs = order[:num_train_pids] + test_idxs = order[num_train_pids:] + assert not bool(set(train_idxs) & set(test_idxs)), 'Error: train and test overlap' + + train = [] + for pid, idx in enumerate(train_idxs): + cam_a_img = cam_a_imgs[idx] + cam_b_img = cam_b_imgs[idx] + train.append((cam_a_img, pid, 0)) + train.append((cam_b_img, pid, 1)) + + test_a = [] + test_b = [] + for pid, idx in enumerate(test_idxs): + cam_a_img = cam_a_imgs[idx] + cam_b_img = cam_b_imgs[idx] + test_a.append((cam_a_img, pid, 0)) + test_b.append((cam_b_img, pid, 1)) + + # use cameraA as query and cameraB as gallery + split = { + 'train': train, + 'query': test_a, + 'gallery': test_b, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } + splits.append(split) + + # use cameraB as query and cameraA as gallery + split = { + 'train': train, + 'query': test_b, + 'gallery': test_a, + 'num_train_pids': num_train_pids, + 'num_query_pids': num_pids - num_train_pids, + 'num_gallery_pids': num_pids - num_train_pids + } + splits.append(split) + + print('Totally {} splits are created'.format(len(splits))) + write_json(splits, self.split_path) + print('Split file saved to {}'.format(self.split_path)) \ No newline at end of file diff --git a/data_v2/datasets/utils.py b/data_v2/datasets/utils.py new file mode 100644 index 000000000..0c803b9e1 --- /dev/null +++ b/data_v2/datasets/utils.py @@ -0,0 +1,128 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +__all__ = ['mkdir_if_missing', 'check_isfile', 'read_json', 'write_json', + 'set_random_seed', 'download_url', 'read_image', 'collect_env_info'] + +import sys +import os +import os.path as osp +import time +import errno +import json +from collections import OrderedDict +import warnings +import random +import numpy as np +import PIL +from PIL import Image + +import torch + + +def mkdir_if_missing(dirname): + """Creates dirname if it is missing.""" + if not osp.exists(dirname): + try: + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def check_isfile(fpath): + """Checks if the given path is a file. + + Args: + fpath (str): file path. + + Returns: + bool + """ + isfile = osp.isfile(fpath) + if not isfile: + warnings.warn('No file found at "{}"'.format(fpath)) + return isfile + + +def read_json(fpath): + """Reads json file from a path.""" + with open(fpath, 'r') as f: + obj = json.load(f) + return obj + + +def write_json(obj, fpath): + """Writes to a json file.""" + mkdir_if_missing(osp.dirname(fpath)) + with open(fpath, 'w') as f: + json.dump(obj, f, indent=4, separators=(',', ': ')) + + +def set_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def download_url(url, dst): + """Downloads file from a url to a destination. + + Args: + url (str): url to download file. + dst (str): destination path. + """ + from six.moves import urllib + print('* url="{}"'.format(url)) + print('* destination="{}"'.format(dst)) + + def _reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + return + duration = time.time() - start_time + progress_size = int(count * block_size) + speed = int(progress_size / (1024 * duration)) + percent = int(count * block_size * 100 / total_size) + sys.stdout.write('\r...%d%%, %d MB, %d KB/s, %d seconds passed' % + (percent, progress_size / (1024 * 1024), speed, duration)) + sys.stdout.flush() + + urllib.request.urlretrieve(url, dst, _reporthook) + sys.stdout.write('\n') + + +def read_image(path): + """Reads image from path using ``PIL.Image``. + + Args: + path (str): path to an image. + + Returns: + PIL image + """ + got_img = False + if not osp.exists(path): + raise IOError('"{}" does not exist'.format(path)) + while not got_img: + try: + img = Image.open(path).convert('RGB') + got_img = True + except IOError: + print('IOError incurred when reading "{}". Will redo. Don\'t worry. Just chill.'.format(img_path)) + pass + return img + + +def collect_env_info(): + """Returns env info as a string. + + Code source: github.com/facebookresearch/maskrcnn-benchmark + """ + from torch.utils.collect_env import get_pretty_env_info + env_str = get_pretty_env_info() + env_str += '\n Pillow ({})'.format(PIL.__version__) + return env_str diff --git "a/data_v2/datasets/video/Icon\r" "b/data_v2/datasets/video/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/data_v2/datasets/video/__init__.py b/data_v2/datasets/video/__init__.py new file mode 100755 index 000000000..f82da1a6f --- /dev/null +++ b/data_v2/datasets/video/__init__.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from __future__ import print_function + +from .mars import Mars +from .ilidsvid import iLIDSVID +from .prid2011 import PRID2011 +from .dukemtmcvidreid import DukeMTMCVidReID \ No newline at end of file diff --git a/data_v2/datasets/video/dukemtmcvidreid.py b/data_v2/datasets/video/dukemtmcvidreid.py new file mode 100755 index 000000000..93b13010e --- /dev/null +++ b/data_v2/datasets/video/dukemtmcvidreid.py @@ -0,0 +1,111 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +import warnings + +from .. import VideoDataset +from ..utils import read_json, write_json + + +class DukeMTMCVidReID(VideoDataset): + """DukeMTMCVidReID. + + Reference: + - Ristani et al. Performance Measures and a Data Set for Multi-Target, + Multi-Camera Tracking. ECCVW 2016. + - Wu et al. Exploit the Unknown Gradually: One-Shot Video-Based Person + Re-Identification by Stepwise Learning. CVPR 2018. + + URL: ``_ + + Dataset statistics: + - identities: 702 (train) + 702 (test). + - tracklets: 2196 (train) + 2636 (test). + """ + dataset_dir = 'dukemtmc-vidreid' + dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-VideoReID.zip' + + def __init__(self, root='', min_seq_len=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.train_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/train') + self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/query') + self.gallery_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/gallery') + self.split_train_json_path = osp.join(self.dataset_dir, 'split_train.json') + self.split_query_json_path = osp.join(self.dataset_dir, 'split_query.json') + self.split_gallery_json_path = osp.join(self.dataset_dir, 'split_gallery.json') + self.min_seq_len = min_seq_len + + required_files = [ + self.dataset_dir, + self.train_dir, + self.query_dir, + self.gallery_dir + ] + self.check_before_run(required_files) + + train = self.process_dir(self.train_dir, self.split_train_json_path, relabel=True) + query = self.process_dir(self.query_dir, self.split_query_json_path, relabel=False) + gallery = self.process_dir(self.gallery_dir, self.split_gallery_json_path, relabel=False) + + super(DukeMTMCVidReID, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dir_path, json_path, relabel): + if osp.exists(json_path): + split = read_json(json_path) + return split['tracklets'] + + print('=> Generating split json file (** this might take a while **)') + pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store + print('Processing "{}" with {} person identities'.format(dir_path, len(pdirs))) + + pid_container = set() + for pdir in pdirs: + pid = int(osp.basename(pdir)) + pid_container.add(pid) + pid2label = {pid:label for label, pid in enumerate(pid_container)} + + tracklets = [] + for pdir in pdirs: + pid = int(osp.basename(pdir)) + if relabel: + pid = pid2label[pid] + tdirs = glob.glob(osp.join(pdir, '*')) + for tdir in tdirs: + raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) + num_imgs = len(raw_img_paths) + + if num_imgs < self.min_seq_len: + continue + + img_paths = [] + for img_idx in range(num_imgs): + # some tracklet starts from 0002 instead of 0001 + img_idx_name = 'F' + str(img_idx+1).zfill(4) + res = glob.glob(osp.join(tdir, '*' + img_idx_name + '*.jpg')) + if len(res) == 0: + warnings.warn('Index name {} in {} is missing, skip'.format(img_idx_name, tdir)) + continue + img_paths.append(res[0]) + img_name = osp.basename(img_paths[0]) + if img_name.find('_') == -1: + # old naming format: 0001C6F0099X30823.jpg + camid = int(img_name[5]) - 1 + else: + # new naming format: 0001_C6_F0099_X30823.jpg + camid = int(img_name[6]) - 1 + img_paths = tuple(img_paths) + tracklets.append((img_paths, pid, camid)) + + print('Saving split to {}'.format(json_path)) + split_dict = {'tracklets': tracklets} + write_json(split_dict, json_path) + + return tracklets \ No newline at end of file diff --git a/data_v2/datasets/video/ilidsvid.py b/data_v2/datasets/video/ilidsvid.py new file mode 100755 index 000000000..5809ae178 --- /dev/null +++ b/data_v2/datasets/video/ilidsvid.py @@ -0,0 +1,126 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob +from scipy.io import loadmat + +from .. import VideoDataset +from ..utils import read_json, write_json + + +class iLIDSVID(VideoDataset): + """iLIDS-VID. + + Reference: + Wang et al. Person Re-Identification by Video Ranking. ECCV 2014. + + URL: ``_ + + Dataset statistics: + - identities: 300. + - tracklets: 600. + - cameras: 2. + """ + dataset_dir = 'ilids-vid' + dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' + + def __init__(self, root='', split_id=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') + self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') + self.split_mat_path = osp.join(self.split_dir, 'train_test_splits_ilidsvid.mat') + self.split_path = osp.join(self.dataset_dir, 'splits.json') + self.cam_1_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam1') + self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2') + + required_files = [ + self.dataset_dir, + self.data_dir, + self.split_dir + ] + self.check_before_run(required_files) + + self.prepare_split() + splits = read_json(self.split_path) + if split_id >= len(splits): + raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1)) + split = splits[split_id] + train_dirs, test_dirs = split['train'], split['test'] + + train = self.process_data(train_dirs, cam1=True, cam2=True) + query = self.process_data(test_dirs, cam1=True, cam2=False) + gallery = self.process_data(test_dirs, cam1=False, cam2=True) + + super(iLIDSVID, self).__init__(train, query, gallery, **kwargs) + + def prepare_split(self): + if not osp.exists(self.split_path): + print('Creating splits ...') + mat_split_data = loadmat(self.split_mat_path)['ls_set'] + + num_splits = mat_split_data.shape[0] + num_total_ids = mat_split_data.shape[1] + assert num_splits == 10 + assert num_total_ids == 300 + num_ids_each = num_total_ids // 2 + + # pids in mat_split_data are indices, so we need to transform them + # to real pids + person_cam1_dirs = sorted(glob.glob(osp.join(self.cam_1_path, '*'))) + person_cam2_dirs = sorted(glob.glob(osp.join(self.cam_2_path, '*'))) + + person_cam1_dirs = [osp.basename(item) for item in person_cam1_dirs] + person_cam2_dirs = [osp.basename(item) for item in person_cam2_dirs] + + # make sure persons in one camera view can be found in the other camera view + assert set(person_cam1_dirs) == set(person_cam2_dirs) + + splits = [] + for i_split in range(num_splits): + # first 50% for testing and the remaining for training, following Wang et al. ECCV'14. + train_idxs = sorted(list(mat_split_data[i_split, num_ids_each:])) + test_idxs = sorted(list(mat_split_data[i_split, :num_ids_each])) + + train_idxs = [int(i)-1 for i in train_idxs] + test_idxs = [int(i)-1 for i in test_idxs] + + # transform pids to person dir names + train_dirs = [person_cam1_dirs[i] for i in train_idxs] + test_dirs = [person_cam1_dirs[i] for i in test_idxs] + + split = {'train': train_dirs, 'test': test_dirs} + splits.append(split) + + print('Totally {} splits are created, following Wang et al. ECCV\'14'.format(len(splits))) + print('Split file is saved to {}'.format(self.split_path)) + write_json(splits, self.split_path) + + def process_data(self, dirnames, cam1=True, cam2=True): + tracklets = [] + dirname2pid = {dirname:i for i, dirname in enumerate(dirnames)} + + for dirname in dirnames: + if cam1: + person_dir = osp.join(self.cam_1_path, dirname) + img_names = glob.glob(osp.join(person_dir, '*.png')) + assert len(img_names) > 0 + img_names = tuple(img_names) + pid = dirname2pid[dirname] + tracklets.append((img_names, pid, 0)) + + if cam2: + person_dir = osp.join(self.cam_2_path, dirname) + img_names = glob.glob(osp.join(person_dir, '*.png')) + assert len(img_names) > 0 + img_names = tuple(img_names) + pid = dirname2pid[dirname] + tracklets.append((img_names, pid, 1)) + + return tracklets \ No newline at end of file diff --git a/data_v2/datasets/video/mars.py b/data_v2/datasets/video/mars.py new file mode 100755 index 000000000..df9e14092 --- /dev/null +++ b/data_v2/datasets/video/mars.py @@ -0,0 +1,112 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +from scipy.io import loadmat +import warnings + +from .. import VideoDataset + + +class Mars(VideoDataset): + """MARS. + + Reference: + Zheng et al. MARS: A Video Benchmark for Large-Scale Person Re-identification. ECCV 2016. + + URL: ``_ + + Dataset statistics: + - identities: 1261. + - tracklets: 8298 (train) + 1980 (query) + 9330 (gallery). + - cameras: 6. + """ + dataset_dir = 'mars' + dataset_url = None + + def __init__(self, root='', **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.train_name_path = osp.join(self.dataset_dir, 'info/train_name.txt') + self.test_name_path = osp.join(self.dataset_dir, 'info/test_name.txt') + self.track_train_info_path = osp.join(self.dataset_dir, 'info/tracks_train_info.mat') + self.track_test_info_path = osp.join(self.dataset_dir, 'info/tracks_test_info.mat') + self.query_IDX_path = osp.join(self.dataset_dir, 'info/query_IDX.mat') + + required_files = [ + self.dataset_dir, + self.train_name_path, + self.test_name_path, + self.track_train_info_path, + self.track_test_info_path, + self.query_IDX_path + ] + self.check_before_run(required_files) + + train_names = self.get_names(self.train_name_path) + test_names = self.get_names(self.test_name_path) + track_train = loadmat(self.track_train_info_path)['track_train_info'] # numpy.ndarray (8298, 4) + track_test = loadmat(self.track_test_info_path)['track_test_info'] # numpy.ndarray (12180, 4) + query_IDX = loadmat(self.query_IDX_path)['query_IDX'].squeeze() # numpy.ndarray (1980,) + query_IDX -= 1 # index from 0 + track_query = track_test[query_IDX,:] + gallery_IDX = [i for i in range(track_test.shape[0]) if i not in query_IDX] + track_gallery = track_test[gallery_IDX,:] + + train = self.process_data(train_names, track_train, home_dir='bbox_train', relabel=True) + query = self.process_data(test_names, track_query, home_dir='bbox_test', relabel=False) + gallery = self.process_data(test_names, track_gallery, home_dir='bbox_test', relabel=False) + + super(Mars, self).__init__(train, query, gallery, **kwargs) + + def get_names(self, fpath): + names = [] + with open(fpath, 'r') as f: + for line in f: + new_line = line.rstrip() + names.append(new_line) + return names + + def process_data(self, names, meta_data, home_dir=None, relabel=False, min_seq_len=0): + assert home_dir in ['bbox_train', 'bbox_test'] + num_tracklets = meta_data.shape[0] + pid_list = list(set(meta_data[:,2].tolist())) + num_pids = len(pid_list) + + if relabel: pid2label = {pid:label for label, pid in enumerate(pid_list)} + tracklets = [] + + for tracklet_idx in range(num_tracklets): + data = meta_data[tracklet_idx,...] + start_index, end_index, pid, camid = data + if pid == -1: + continue # junk images are just ignored + assert 1 <= camid <= 6 + if relabel: pid = pid2label[pid] + camid -= 1 # index starts from 0 + img_names = names[start_index - 1:end_index] + + # make sure image names correspond to the same person + pnames = [img_name[:4] for img_name in img_names] + assert len(set(pnames)) == 1, 'Error: a single tracklet contains different person images' + + # make sure all images are captured under the same camera + camnames = [img_name[5] for img_name in img_names] + assert len(set(camnames)) == 1, 'Error: images are captured under different cameras!' + + # append image names with directory information + img_paths = [osp.join(self.dataset_dir, home_dir, img_name[:4], img_name) for img_name in img_names] + if len(img_paths) >= min_seq_len: + img_paths = tuple(img_paths) + tracklets.append((img_paths, pid, camid)) + + return tracklets + + def combine_all(self): + warnings.warn('Some query IDs do not appear in gallery. Therefore, combineall ' + 'does not make any difference to Mars') \ No newline at end of file diff --git a/data_v2/datasets/video/prid2011.py b/data_v2/datasets/video/prid2011.py new file mode 100755 index 000000000..2d01dc6e4 --- /dev/null +++ b/data_v2/datasets/video/prid2011.py @@ -0,0 +1,80 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import sys +import os +import os.path as osp +import glob + +from .. import VideoDataset +from ..utils import read_json, write_json + + +class PRID2011(VideoDataset): + """PRID2011. + + Reference: + Hirzer et al. Person Re-Identification by Descriptive and + Discriminative Classification. SCIA 2011. + + URL: ``_ + + Dataset statistics: + - identities: 200. + - tracklets: 400. + - cameras: 2. + """ + dataset_dir = 'prid2011' + dataset_url = None + + def __init__(self, root='', split_id=0, **kwargs): + self.root = osp.abspath(osp.expanduser(root)) + self.dataset_dir = osp.join(self.root, self.dataset_dir) + self.download_dataset(self.dataset_dir, self.dataset_url) + + self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json') + self.cam_a_dir = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a') + self.cam_b_dir = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b') + + required_files = [ + self.dataset_dir, + self.cam_a_dir, + self.cam_b_dir + ] + self.check_before_run(required_files) + + splits = read_json(self.split_path) + if split_id >= len(splits): + raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1)) + split = splits[split_id] + train_dirs, test_dirs = split['train'], split['test'] + + train = self.process_dir(train_dirs, cam1=True, cam2=True) + query = self.process_dir(test_dirs, cam1=True, cam2=False) + gallery = self.process_dir(test_dirs, cam1=False, cam2=True) + + super(PRID2011, self).__init__(train, query, gallery, **kwargs) + + def process_dir(self, dirnames, cam1=True, cam2=True): + tracklets = [] + dirname2pid = {dirname:i for i, dirname in enumerate(dirnames)} + + for dirname in dirnames: + if cam1: + person_dir = osp.join(self.cam_a_dir, dirname) + img_names = glob.glob(osp.join(person_dir, '*.png')) + assert len(img_names) > 0 + img_names = tuple(img_names) + pid = dirname2pid[dirname] + tracklets.append((img_names, pid, 0)) + + if cam2: + person_dir = osp.join(self.cam_b_dir, dirname) + img_names = glob.glob(osp.join(person_dir, '*.png')) + assert len(img_names) > 0 + img_names = tuple(img_names) + pid = dirname2pid[dirname] + tracklets.append((img_names, pid, 1)) + + return tracklets \ No newline at end of file diff --git a/data_v2/sampler.py b/data_v2/sampler.py new file mode 100755 index 000000000..029db6a75 --- /dev/null +++ b/data_v2/sampler.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import +from __future__ import division + +from collections import defaultdict +import numpy as np +import copy +import random + +import torch +from torch.utils.data.sampler import Sampler, RandomSampler + + +class RandomIdentitySampler(Sampler): + """Randomly samples N identities each with K instances. + + Args: + data_source (list): contains tuples of (img_path(s), pid, camid). + batch_size (int): batch size. + num_instances (int): number of instances per identity in a batch. + """ + def __init__(self, data_source, batch_size, num_instances): + if batch_size < num_instances: + raise ValueError('batch_size={} must be no less ' + 'than num_instances={}'.format(batch_size, num_instances)) + + self.data_source = data_source + self.batch_size = batch_size + self.num_instances = num_instances + self.num_pids_per_batch = self.batch_size // self.num_instances + self.index_dic = defaultdict(list) + for index, (_, pid, _) in enumerate(self.data_source): + self.index_dic[pid].append(index) + self.pids = list(self.index_dic.keys()) + + # estimate number of examples in an epoch + # TODO: improve precision + self.length = 0 + for pid in self.pids: + idxs = self.index_dic[pid] + num = len(idxs) + if num < self.num_instances: + num = self.num_instances + self.length += num - num % self.num_instances + + def __iter__(self): + batch_idxs_dict = defaultdict(list) + + for pid in self.pids: + idxs = copy.deepcopy(self.index_dic[pid]) + if len(idxs) < self.num_instances: + idxs = np.random.choice(idxs, size=self.num_instances, replace=True) + random.shuffle(idxs) + batch_idxs = [] + for idx in idxs: + batch_idxs.append(idx) + if len(batch_idxs) == self.num_instances: + batch_idxs_dict[pid].append(batch_idxs) + batch_idxs = [] + + avai_pids = copy.deepcopy(self.pids) + final_idxs = [] + + while len(avai_pids) >= self.num_pids_per_batch: + selected_pids = random.sample(avai_pids, self.num_pids_per_batch) + for pid in selected_pids: + batch_idxs = batch_idxs_dict[pid].pop(0) + final_idxs.extend(batch_idxs) + if len(batch_idxs_dict[pid]) == 0: + avai_pids.remove(pid) + + self.length = len(final_idxs) + return iter(final_idxs) + + def __len__(self): + return self.length + + +def build_train_sampler(data_source, train_sampler, batch_size=32, num_instances=4, **kwargs): + """Builds a training sampler. + + Args: + data_source (list): contains tuples of (img_path(s), pid, camid). + train_sampler (str): sampler name (default: ``RandomSampler``). + batch_size (int, optional): batch size. Default is 32. + num_instances (int, optional): number of instances per identity in a + batch (for ``RandomIdentitySampler``). Default is 4. + """ + if train_sampler == 'RandomIdentitySampler': + sampler = RandomIdentitySampler(data_source, batch_size, num_instances) + + else: + sampler = RandomSampler(data_source) + + return sampler diff --git a/data_v2/transforms.py b/data_v2/transforms.py new file mode 100755 index 000000000..6780affdf --- /dev/null +++ b/data_v2/transforms.py @@ -0,0 +1,318 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from PIL import Image +import random +import numpy as np +import math +from collections import deque + +import torch +from torchvision.transforms import * +import torchvision.transforms.functional as TF + + +class Random2DTranslation(object): + """Randomly translates the input image with a probability. + + Specifically, given a predefined shape (height, width), the input is first + resized with a factor of 1.125, leading to (height*1.125, width*1.125), then + a random crop is performed. Such operation is done with a probability. + + Args: + height (int): target image height. + width (int): target image width. + p (float, optional): probability that this operation takes place. + Default is 0.5. + interpolation (int, optional): desired interpolation. Default is + ``PIL.Image.BILINEAR`` + """ + + def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR): + self.height = height + self.width = width + self.p = p + self.interpolation = interpolation + + def __call__(self, img): + if random.uniform(0, 1) > self.p: + return img.resize((self.width, self.height), self.interpolation) + + new_width, new_height = int( + round(self.width * 1.125)), int(round(self.height * 1.125)) + resized_img = img.resize((new_width, new_height), self.interpolation) + x_maxrange = new_width - self.width + y_maxrange = new_height - self.height + x1 = int(round(random.uniform(0, x_maxrange))) + y1 = int(round(random.uniform(0, y_maxrange))) + croped_img = resized_img.crop( + (x1, y1, x1 + self.width, y1 + self.height)) + return croped_img + + +class Cutout(object): + def __init__(self, probability=0.5, size=64, mean=[0.4914, 0.4822, 0.4465]): + self.probability = probability + self.mean = mean + self.size = size + + def __call__(self, img): + + if random.uniform(0, 1) > self.probability: + return img + + h = self.size + w = self.size + for attempt in range(100): + + area = img.size()[1] * img.size()[2] + if w < img.size()[2] and h < img.size()[1]: + x1 = random.randint(0, img.size()[1] - h) + y1 = random.randint(0, img.size()[2] - w) + if img.size()[0] == 3: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] + img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] + else: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + return img + return img + + +class RandomErasing(object): + """Randomly erases an image patch. + + Origin: ``_ + + Reference: + Zhong et al. Random Erasing Data Augmentation. + + Args: + probability (float, optional): probability that this operation takes place. + Default is 0.5. + sl (float, optional): min erasing area. + sh (float, optional): max erasing area. + r1 (float, optional): min aspect ratio. + mean (list, optional): erasing value. + """ + + def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]): + self.probability = probability + self.mean = mean + self.sl = sl + self.sh = sh + self.r1 = r1 + + def __call__(self, img): + if random.uniform(0, 1) > self.probability: + return img + + for attempt in range(100): + + area = img.size()[1] * img.size()[2] + + target_area = random.uniform(self.sl, self.sh) * area + aspect_ratio = random.uniform(self.r1, 1 / self.r1) + + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + + if w < img.size()[2] and h < img.size()[1]: + x1 = random.randint(0, img.size()[1] - h) + y1 = random.randint(0, img.size()[2] - w) + if img.size()[0] == 3: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] + img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] + else: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + return img + + return img + + +class ColorAugmentation(object): + """Randomly alters the intensities of RGB channels. + + Reference: + Krizhevsky et al. ImageNet Classification with Deep ConvolutionalNeural + Networks. NIPS 2012. + + Args: + p (float, optional): probability that this operation takes place. + Default is 0.5. + """ + + def __init__(self, p=0.5): + self.p = p + self.eig_vec = torch.Tensor([ + [0.4009, 0.7192, -0.5675], + [-0.8140, -0.0045, -0.5808], + [0.4203, -0.6948, -0.5836], + ]) + self.eig_val = torch.Tensor([[0.2175, 0.0188, 0.0045]]) + + def _check_input(self, tensor): + assert tensor.dim() == 3 and tensor.size(0) == 3 + + def __call__(self, tensor): + if random.uniform(0, 1) > self.p: + return tensor + alpha = torch.normal(mean=torch.zeros_like(self.eig_val)) * 0.1 + quatity = torch.mm(self.eig_val * alpha, self.eig_vec) + tensor = tensor + quatity.view(3, 1, 1) + return tensor + + +class RandomPatch(object): + """Random patch data augmentation. + + There is a patch pool that stores randomly extracted pathces from person images. + + For each input image, + 1) we extract a random patch and store the patch in the patch pool; + 2) randomly select a patch from the patch pool and paste it on the + input to simulate occlusion. + + Reference: + - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019. + """ + + def __init__(self, prob_happen=0.5, pool_capacity=50000, min_sample_size=100, + patch_min_area=0.01, patch_max_area=0.5, patch_min_ratio=0.1, + prob_rotate=0.5, prob_flip_leftright=0.5, + ): + self.prob_happen = prob_happen + + self.patch_min_area = patch_min_area + self.patch_max_area = patch_max_area + self.patch_min_ratio = patch_min_ratio + + self.prob_rotate = prob_rotate + self.prob_flip_leftright = prob_flip_leftright + + self.patchpool = deque(maxlen=pool_capacity) + self.min_sample_size = min_sample_size + + def generate_wh(self, W, H): + area = W * H + for attempt in range(100): + target_area = random.uniform( + self.patch_min_area, self.patch_max_area) * area + aspect_ratio = random.uniform( + self.patch_min_ratio, 1. / self.patch_min_ratio) + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + if w < W and h < H: + return w, h + return None, None + + def transform_patch(self, patch): + if random.uniform(0, 1) > self.prob_flip_leftright: + patch = patch.transpose(Image.FLIP_LEFT_RIGHT) + if random.uniform(0, 1) > self.prob_rotate: + patch = patch.rotate(random.randint(-10, 10)) + return patch + + def __call__(self, img): + W, H = img.size # original image size + + # collect new patch + w, h = self.generate_wh(W, H) + if w is not None and h is not None: + x1 = random.randint(0, W - w) + y1 = random.randint(0, H - h) + new_patch = img.crop((x1, y1, x1 + w, y1 + h)) + self.patchpool.append(new_patch) + + if len(self.patchpool) < self.min_sample_size: + return img + + if random.uniform(0, 1) > self.prob_happen: + return img + + # paste a randomly selected patch on a random position + patch = random.sample(self.patchpool, 1)[0] + patchW, patchH = patch.size + x1 = random.randint(0, W - patchW) + y1 = random.randint(0, H - patchH) + patch = self.transform_patch(patch) + img.paste(patch, (x1, y1)) + + return img + + +def build_transforms(height, width, transforms='random_flip', norm_mean=[0.485, 0.456, 0.406], + norm_std=[0.229, 0.224, 0.225], **kwargs): + """Builds train and test transform functions. + + Args: + height (int): target image height. + width (int): target image width. + transforms (str or list of str, optional): transformations applied to model training. + Default is 'random_flip'. + norm_mean (list or None, optional): normalization mean values. Default is ImageNet means. + norm_std (list or None, optional): normalization standard deviation values. Default is + ImageNet standard deviation values. + """ + if transforms is None: + transforms = [] + + if isinstance(transforms, str): + transforms = [transforms] + + if not isinstance(transforms, list): + raise ValueError( + 'transforms must be a list of strings, but found to be {}'.format(type(transforms))) + + if len(transforms) > 0: + transforms = [t.lower() for t in transforms] + + if norm_mean is None or norm_std is None: + norm_mean = [0.485, 0.456, 0.406] # imagenet mean + norm_std = [0.229, 0.224, 0.225] # imagenet std + normalize = Normalize(mean=norm_mean, std=norm_std) + + print('Building train transforms ...') + transform_tr = [] + transform_tr += [Resize((height, width))] + print('+ resize to {}x{}'.format(height, width)) + if 'random_flip' in transforms: + print('+ random flip') + transform_tr += [RandomHorizontalFlip()] + if 'random_crop' in transforms: + print('+ random crop (enlarge to {}x{} and ' + 'crop {}x{})'.format(int(round(height * 1.125)), int(round(width * 1.125)), height, width)) + transform_tr += [Random2DTranslation(height, width)] + if 'random_patch' in transforms: + print('+ random patch') + transform_tr += [RandomPatch()] + + if 'color_jitter' in transforms: + print('+ color jitter') + transform_tr += [ColorJitter(brightness=0.2, + contrast=0.15, saturation=0, hue=0)] + print('+ to torch tensor of range [0, 1]') + transform_tr += [ToTensor()] + print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std)) + transform_tr += [normalize] + if 'random_erase' in transforms: + print('+ random erase') + transform_tr += [RandomErasing()] + if 'cutout' in transforms: + print('+ cutout augmentation') + transform_tr += [Cutout()] + transform_tr = Compose(transform_tr) + + print('Building test transforms ...') + print('+ resize to {}x{}'.format(height, width)) + print('+ to torch tensor of range [0, 1]') + print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std)) + transform_te = Compose([ + Resize((height, width)), + ToTensor(), + normalize, + ]) + + return transform_tr, transform_te diff --git a/data_v2/utils.py b/data_v2/utils.py new file mode 100644 index 000000000..0c803b9e1 --- /dev/null +++ b/data_v2/utils.py @@ -0,0 +1,128 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +__all__ = ['mkdir_if_missing', 'check_isfile', 'read_json', 'write_json', + 'set_random_seed', 'download_url', 'read_image', 'collect_env_info'] + +import sys +import os +import os.path as osp +import time +import errno +import json +from collections import OrderedDict +import warnings +import random +import numpy as np +import PIL +from PIL import Image + +import torch + + +def mkdir_if_missing(dirname): + """Creates dirname if it is missing.""" + if not osp.exists(dirname): + try: + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def check_isfile(fpath): + """Checks if the given path is a file. + + Args: + fpath (str): file path. + + Returns: + bool + """ + isfile = osp.isfile(fpath) + if not isfile: + warnings.warn('No file found at "{}"'.format(fpath)) + return isfile + + +def read_json(fpath): + """Reads json file from a path.""" + with open(fpath, 'r') as f: + obj = json.load(f) + return obj + + +def write_json(obj, fpath): + """Writes to a json file.""" + mkdir_if_missing(osp.dirname(fpath)) + with open(fpath, 'w') as f: + json.dump(obj, f, indent=4, separators=(',', ': ')) + + +def set_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def download_url(url, dst): + """Downloads file from a url to a destination. + + Args: + url (str): url to download file. + dst (str): destination path. + """ + from six.moves import urllib + print('* url="{}"'.format(url)) + print('* destination="{}"'.format(dst)) + + def _reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + return + duration = time.time() - start_time + progress_size = int(count * block_size) + speed = int(progress_size / (1024 * duration)) + percent = int(count * block_size * 100 / total_size) + sys.stdout.write('\r...%d%%, %d MB, %d KB/s, %d seconds passed' % + (percent, progress_size / (1024 * 1024), speed, duration)) + sys.stdout.flush() + + urllib.request.urlretrieve(url, dst, _reporthook) + sys.stdout.write('\n') + + +def read_image(path): + """Reads image from path using ``PIL.Image``. + + Args: + path (str): path to an image. + + Returns: + PIL image + """ + got_img = False + if not osp.exists(path): + raise IOError('"{}" does not exist'.format(path)) + while not got_img: + try: + img = Image.open(path).convert('RGB') + got_img = True + except IOError: + print('IOError incurred when reading "{}". Will redo. Don\'t worry. Just chill.'.format(img_path)) + pass + return img + + +def collect_env_info(): + """Returns env info as a string. + + Code source: github.com/facebookresearch/maskrcnn-benchmark + """ + from torch.utils.collect_env import get_pretty_env_info + env_str = get_pretty_env_info() + env_str += '\n Pillow ({})'.format(PIL.__version__) + return env_str diff --git a/demo.sh b/demo.sh new file mode 100755 index 000000000..821a57f63 --- /dev/null +++ b/demo.sh @@ -0,0 +1,50 @@ +#mAP: 0.9204 rank1: 0.9469 rank3: 0.9664 rank5: 0.9715 rank10: 0.9780 (Best: 0.9204 @epoch 4) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 40 --epochs 160 --decay_type step_120_140 --loss 1*CrossEntropy+2*Triplet --margin 0.3 --re_rank --random_erasing --save MGN_adam --nGPU 2 --lr 2e-4 --optimizer ADAM + +#mAP: 0.9094 rank1: 0.9388 rank3: 0.9596 rank5: 0.9659 rank10: 0.9748 (Best: 0.9094 @epoch 4) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 40 --epochs 160 --decay_type step_120_140 --loss 1*CrossEntropy+1*Triplet --margin 0.3 --re_rank --random_erasing --save MGN_adam_1 --nGPU 2 --lr 1e-4 --optimizer ADAM + +#mAP: 0.9217 rank1: 0.9460 rank3: 0.9653 rank5: 0.9706 rank10: 0.9801 (Best: 0.9217 @epoch 4) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 40 --epochs 160 --decay_type step_120_140 --loss 1*CrossEntropy+2*Triplet --margin 1.2 --re_rank --random_erasing --save MGN_adam_margin_1.2 --nGPU 2 --lr 2e-4 --optimizer ADAM + +#mAP: 0.8986 rank1: 0.9356 rank3: 0.9567 rank5: 0.9620 rank10: 0.9727 (Best: 0.8986 @epoch 4) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 40 --epochs 160 --decay_type step_120_140 --loss 1*CrossEntropy+2*Triplet --margin 0.3 --re_rank --random_erasing --save MGN_adamax --nGPU 2 --lr 2e-4 --optimizer ADAMAX + +#mAP: 0.5494 rank1: 0.7058 rank3: 0.7696 rank5: 0.8023 rank10: 0.8432 (Best: 0.5494 @epoch 4) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 40 --epochs 160 --decay_type step_80_120 --loss 1*CrossEntropy+1*Triplet --margin 0.3 --re_rank --random_erasing --save MGN_sgd --nGPU 2 --lr 1e-2 --optimizer SGD + +#mAP: 0.8480 rank1: 0.9008 rank3: 0.9317 rank5: 0.9436 rank10: 0.9555 (Best: 0.8480 @epoch 3) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 40 --epochs 120 --decay_type step_60_80 --loss 1*CrossEntropy+1*Triplet --margin 0.3 --re_rank --random_erasing --save MGN_sgd_1 --nGPU 2 --lr 1e-2 --optimizer SGD + +#mAP: 0.8455 rank1: 0.9032 rank3: 0.9350 rank5: 0.9433 rank10: 0.9537 (Best: 0.8455 @epoch 3) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 40 --epochs 120 --decay_type step_60_80 --loss 1*CrossEntropy+1*Triplet --margin 1.2 --re_rank --random_erasing --save MGN_sgd_2 --nGPU 2 --lr 1e-2 --optimizer SGD + +#mAP: 0.8979 rank1: 0.9376 rank3: 0.9569 rank5: 0.9623 rank10: 0.9745 (Best: 0.8979 @epoch 200) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 50 --epochs 200 --decay_type step_130_170 --loss 1*CrossEntropy+1*Triplet --margin 1.2 --re_rank --random_erasing --save sgd_1 --nGPU 2 --lr 1e-2 --optimizer SGD --reset + +#mAP: 0.8053 rank1: 0.9228 rank3: 0.9581 rank5: 0.9676 rank10: 0.9804 (Best: 0.8054 @epoch 190) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --reset --batchid 16 --batchtest 32 --test_every 10 --epochs 200 --decay_type step_240_250 --loss 1*CrossEntropy+1*Triplet --margin 1.2 --save sgd_2 --nGPU 2 --lr 1e-2 --optimizer SGD --save_models --random_erasing --reset + +#mAP: 0.8251 rank1: 0.9353 rank3: 0.9679 rank5: 0.9783 rank10: 0.9866 (Best: 0.8251 @epoch 200) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --reset --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 10 --epochs 200 --decay_type step_240_250 --loss 1*CrossEntropy+2*Triplet --margin 1.2 --random_erasing --save adam_1 --nGPU 2 --lr 2e-4 --optimizer ADAM --save_models + +#mAP: 0.9097 rank1: 0.9442 rank3: 0.9614 rank5: 0.9679 rank10: 0.9751 +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 100 --epochs 300 --decay_type step_250_290 --loss 1*CrossEntropy+1*Triplet --margin 1.2 --save sgd_3 --nGPU 2 --lr 1e-2 --optimizer SGD --save_models --random_erasing --reset --re_rank + +#mAP: 0.9353 rank1: 0.9534 rank3: 0.9706 rank5: 0.9768 rank10: 0.9849 +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 100 --epochs 300 --decay_type step_250_290 --loss 1*CrossEntropy+2*Triplet --margin 1.2 --save adam_2 --nGPU 2 --lr 2e-4 --optimizer ADAM --save_models --random_erasing --reset --re_rank + +#mAP: 0.9174 rank1: 0.9433 rank3: 0.9617 rank5: 0.9679 rank10: 0.9754 +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 20 --epochs 300 --decay_type step_250_290 --loss 1*CrossEntropy+1*Triplet --margin 1.2 --save sgd_3 --nGPU 2 --lr 1e-2 --optimizer SGD --random_erasing --reset --re_rank --nesterov + +#mAP: 0.9376 rank1: 0.9558 rank3: 0.9712 rank5: 0.9765 rank10: 0.9816 +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 100 --epochs 300 --decay_type step_250_290 --loss 1*CrossEntropy+2*Triplet --margin 1.2 --save adam_3 --nGPU 2 --lr 2e-4 --optimizer ADAM --random_erasing --reset --re_rank --amsgrad + +#mAP: 0.9323 rank1: 0.9513 rank3: 0.9700 rank5: 0.9745 rank10: 0.9813 +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 100 --epochs 300 --decay_type step_250_290 --loss 1*CrossEntropy+2*Triplet --margin 0.3 --save adam_1 --nGPU 2 --lr 2e-4 --optimizer ADAM --random_erasing --reset --re_rank --amsgrad + +#mAP: 0.9270 rank1: 0.9510 rank3: 0.9691 rank5: 0.9751 rank10: 0.9810 +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 50 --epochs 500 --decay_type step_300_420 --loss 1*CrossEntropy+1*Triplet --margin 1.2 --pool avg --save sgd_1 --nGPU 2 --lr 1e-2 --optimizer SGD --random_erasing --reset --re_rank --nesterov + +#0.9383 rank1: 0.9578 rank3: 0.9721 rank5: 0.9783 rank10: 0.9843 (Best: 0.9383 @epoch 400) +#CUDA_VISIBLE_DEVICES=2,3 python3 main.py --datadir ../reid-mgn/Market-1501-v15.09.15/ --batchid 16 --batchtest 32 --test_every 50 --epochs 400 --decay_type step_320_380 --loss 1*CrossEntropy+2*Triplet --margin 1.2 --save adam_1 --nGPU 2 --lr 2e-4 --optimizer ADAM --random_erasing --reset --re_rank --amsgrad \ No newline at end of file diff --git a/engine_v1.py b/engine_v1.py new file mode 100644 index 000000000..08f860412 --- /dev/null +++ b/engine_v1.py @@ -0,0 +1,304 @@ +import os +import torch +import numpy as np +import utils.utility as utility +from scipy.spatial.distance import cdist +from utils.functions import cmc, mean_ap, cmc_baseline, eval_liaoxingyu +from utils.re_ranking import re_ranking +import scipy.io +from torchvision import datasets, transforms +from data_v1.sampler import a_RandomIdentitySampler + + +class Engine(): + def __init__(self, args, model, loss, loader, ckpt): + self.args = args + + # if args.data_train == 'GTA': + # transform_train_list = [ + # # transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC) + # transforms.Resize((384, 128), interpolation=3), + # transforms.Pad(10), + # transforms.RandomCrop((384, 128)), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + # ] + # # train_dataset = datasets.ImageFolder(os.path.join(args.datadir, 'pytorch', 'train_all'), + # # transforms.Compose(transform_train_list)) + # train_dataset = datasets.ImageFolder(os.path.join(args.datadir, 'train'), + # transforms.Compose(transform_train_list)) + # self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batchid * args.batchimage, sampler=a_RandomIdentitySampler( + # train_dataset, args.batchid * args.batchimage, args.batchimage), num_workers=8, pin_memory=True) # 8 workers may work faster + # print('GTA has {} classes'.format(train_dataset.classes)) + # else: + self.train_loader = loader.train_loader + + self.test_loader = loader.test_loader + self.query_loader = loader.query_loader + self.testset = loader.galleryset + self.queryset = loader.queryset + + self.ckpt = ckpt + self.model = model + self.loss = loss + self.lr = 0. + self.optimizer = utility.make_optimizer(args, self.model) + self.device = torch.device('cpu' if args.cpu else 'cuda') + + last_epoch = -1 + + if torch.cuda.is_available(): + self.ckpt.write_log(torch.cuda.get_device_name(0)) + + if args.load != '': + self.optimizer.load_state_dict( + torch.load(os.path.join(ckpt.dir, 'optimizer.pt')) + ) + last_epoch = int(ckpt.log[-1, 0]) - 1 + + # for _ in range(last_epoch): + # self.scheduler.step() + + if args.pre_train != '' and args.resume: + resume_epoch = args.pre_train.split( + '/')[-1].split('.')[0].split('_')[-1] + self.optimizer.load_state_dict( + torch.load(args.pre_train.replace('model', 'optimizer')) + ) + # for _ in range(len(ckpt.log) * args.test_every): + # self.scheduler.step() + last_epoch = resume_epoch - 1 + + self.scheduler = utility.make_scheduler( + args, self.optimizer, last_epoch) + + self.ckpt.write_log( + 'Continue from epoch {}'.format(self.scheduler.last_epoch)) + + print(ckpt.log) + print(self.scheduler._last_lr) + + def train(self): + self.loss.step() + epoch = self.scheduler.last_epoch + lr = self.scheduler.get_last_lr()[0] + + if lr != self.lr: + self.ckpt.write_log( + '[INFO] Epoch: {}\tLearning rate: {:.2e} '.format(epoch + 1, lr)) + self.lr = lr + self.loss.start_log() + self.model.train() + + for batch, (inputs, labels) in enumerate(self.train_loader): + + inputs = inputs.to(self.device) + labels = labels.to(self.device) + + self.optimizer.zero_grad() + outputs = self.model(inputs) + loss = self.loss(outputs, labels) + loss.backward() + self.optimizer.step() + + self.ckpt.write_log('\r[INFO] [{}/{}]\t{}/{}\t{}'.format( + epoch + 1, self.args.epochs, + batch + 1, len(self.train_loader), + self.loss.display_loss(batch)), + end='' if batch + 1 != len(self.train_loader) else '\n') + + self.scheduler.step() + self.loss.end_log(len(self.train_loader)) + + def test(self): + epoch = self.scheduler.last_epoch + self.ckpt.write_log('\n[INFO] Test:') + self.model.eval() + + self.ckpt.add_log(torch.zeros(1, 6)) + # qf = self.extract_feature(self.query_loader,self.args).numpy() + # gf = self.extract_feature(self.test_loader,self.args).numpy() + + qf = self.extract_feature(self.query_loader, self.args) + gf = self.extract_feature(self.test_loader, self.args) + + # qf = self.extract_feature(self.query_loader) + # gf = self.extract_feature(self.test_loader) + + query_ids = np.asarray(self.queryset.ids) + gallery_ids = np.asarray(self.testset.ids) + query_cams = np.asarray(self.queryset.cameras) + gallery_cams = np.asarray(self.testset.cameras) + # print(query_ids.shape) + # print(gallery_ids.shape) + # print(query_cams.shape) + # print(gallery_cams.shape) + # np.save('gf',gf.numpy()) + # np.save('qf',qf.numpy()) + # np.save('qc',query_cams) + # np.save('gc',gallery_cams) + # np.save('qi',query_ids) + # np.save('gi',gallery_ids) + # qf=np.load('/content/qf.npy') + # gf=np.load('/content/gf.npy') + # print('save') + # result = scipy.io.loadmat('pytorch_result.mat') + # qf = torch.FloatTensor(result['query_f']).cuda() + # query_cam = result['query_cam'][0] + # query_label = result['query_label'][0] + # gf = torch.FloatTensor(result['gallery_f']).cuda() + # gallery_cam = result['gallery_cam'][0] + # gallery_label = result['gallery_label'][0] + # print(query_cam.shape) + # print(gallery_cam.shape) + # print(query_label.shape) + # print(gallery_label.shape) + + if self.args.re_rank: + q_g_dist = np.dot(qf, np.transpose(gf)) + q_q_dist = np.dot(qf, np.transpose(qf)) + g_g_dist = np.dot(gf, np.transpose(gf)) + dist = re_ranking(q_g_dist, q_q_dist, g_g_dist) + else: + # dist = cdist(qf, gf,metric='cosine') + + # cosine distance + dist = 1 - torch.mm(qf, gf.t()).cpu().numpy() + + # m, n = qf.shape[0], gf.shape[0] + + # dist = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ + # torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() + # dist.addmm_(1, -2, qf, gf.t()) + # dist = np.dot(qf,np.transpose(gf)) + # print('2') + + # r = cmc(dist, self.queryset.ids, self.testset.ids, self.queryset.cameras, self.testset.cameras, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # m_ap = mean_ap(dist, self.queryset.ids, self.testset.ids, + # self.queryset.cameras, self.testset.cameras) + # r = cmc(dist, query_label, gallery_label, query_cam, gallery_cam, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # m_ap = mean_ap(dist, query_label, gallery_label, query_cam, gallery_cam) + # r, m_ap = cmc_baseline(dist, query_label, gallery_label, query_cam, gallery_cam, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # r, m_ap = cmc_baseline(dist, query_ids, gallery_ids, query_cams, gallery_cams, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # r,m_ap=eval_liaoxingyu(dist, query_label, gallery_label, query_cam, gallery_cam, 50) + r, m_ap = eval_liaoxingyu( + dist, query_ids, gallery_ids, query_cams, gallery_cams, 50) + + self.ckpt.log[-1, 0] = epoch + self.ckpt.log[-1, 1] = m_ap + self.ckpt.log[-1, 2] = r[0] + self.ckpt.log[-1, 3] = r[2] + self.ckpt.log[-1, 4] = r[4] + self.ckpt.log[-1, 5] = r[9] + best = self.ckpt.log.max(0) + + # self.ckpt.write_log( + # '[INFO] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f} (Best: {:.4f} @epoch {})'.format( + # m_ap, + # r[0], r[2], r[4], r[9], + # best[0][0], + # (best[1][0] + 1) * self.args.test_every + # ) + # ) + self.ckpt.write_log( + '[INFO] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f} (Best: {:.4f} @epoch {})'.format( + m_ap, + r[0], r[2], r[4], r[9], + best[0][1], self.ckpt.log[best[1][1], 0] + ) + ) + # if not self.args.test_only: + # self.ckpt.save(self, epoch, is_best=( + # (best[1][0] + 1) * self.args.test_every == epoch)) + if not self.args.test_only: + self.ckpt.save(self, epoch, is_best=( + self.ckpt.log[best[1][1], 0] == epoch)) + + def fliphor(self, inputs): + inv_idx = torch.arange(inputs.size( + 3) - 1, -1, -1).long() # N x C x H x W + return inputs.index_select(3, inv_idx) + + # def extract_feature(self, loader): + # features = torch.FloatTensor() + # for (inputs, labels) in loader: + # ff = torch.FloatTensor(inputs.size(0), 2048).zero_() + # for i in range(2): + # if i == 1: + # inputs = self.fliphor(inputs) + # input_img = inputs.to(self.device) + # outputs = self.model(input_img) + # f = outputs[0].data.cpu() + # ff = ff + f + + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + + # features = torch.cat((features, ff), 0) + # return features + def extract_feature(self, loader, args): + features = torch.FloatTensor() + + for (inputs, labels) in loader: + + input_img = inputs.to(self.device) + outputs = self.model(input_img) + # print(outputs.shape) + if args.feat_inference == 'after': + f1 = outputs[0].data.cpu() + # flip + inputs = inputs.index_select( + 3, torch.arange(inputs.size(3) - 1, -1, -1)) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + f2 = outputs[0].data.cpu() + else: + f1 = outputs[-1].data.cpu() + # flip + inputs = inputs.index_select( + 3, torch.arange(inputs.size(3) - 1, -1, -1)) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + f2 = outputs[-1].data.cpu() + + ff = f1 + f2 + if ff.dim() == 3: + fnorm = torch.norm( + ff, p=2, dim=1, keepdim=True) * np.sqrt(ff.shape[2]) + ff = ff.div(fnorm.expand_as(ff)) + ff = ff.view(ff.size(0), -1) + # ff = ff.view(ff.size(0), -1) + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + + else: + fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + ff = ff.div(fnorm.expand_as(ff)) + # pass + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + features = torch.cat((features, ff), 0) + # print(features.shape) + return features + + def terminate(self): + if self.args.test_only: + self.test() + return True + else: + epoch = self.scheduler.last_epoch + 1 + + return epoch > self.args.epochs diff --git a/engine_v2.py b/engine_v2.py new file mode 100644 index 000000000..d18009342 --- /dev/null +++ b/engine_v2.py @@ -0,0 +1,404 @@ +import os +import torch +import numpy as np +import utils.utility as utility +from scipy.spatial.distance import cdist +from utils.functions import cmc, mean_ap, cmc_baseline, eval_liaoxingyu +from utils.re_ranking import re_ranking +import scipy.io +from torchvision import datasets, transforms +from data_v1.sampler import a_RandomIdentitySampler +from loss.multi_similarity_loss import MultiSimilarityLoss +from loss.triplet import CrossEntropyLabelSmooth + + +class Engine(): + def __init__(self, args, model, optimizer,scheduler,loss, loader, ckpt): + self.args = args + + # if args.data_train == 'GTA': + # transform_train_list = [ + # # transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC) + # transforms.Resize((384, 128), interpolation=3), + # transforms.Pad(10), + # transforms.RandomCrop((384, 128)), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + # ] + # # train_dataset = datasets.ImageFolder(os.path.join(args.datadir, 'pytorch', 'train_all'), + # # transforms.Compose(transform_train_list)) + # train_dataset = datasets.ImageFolder(os.path.join(args.datadir, 'train'), + # transforms.Compose(transform_train_list)) + # self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batchid * args.batchimage, sampler=a_RandomIdentitySampler( + # train_dataset, args.batchid * args.batchimage, args.batchimage), num_workers=8, pin_memory=True) # 8 workers may work faster + # print('GTA has {} classes'.format(train_dataset.classes)) + # else: + self.train_loader = loader.train_loader + + self.test_loader = loader.test_loader + self.query_loader = loader.query_loader + self.testset = loader.galleryset + self.queryset = loader.queryset + + self.ckpt = ckpt + self.model = model + self.optimizer = optimizer + self.scheduler = scheduler + self.loss = loss + + + ################# + self.weight_t = 1 + self.weight_x = 1 + + # self.criterion_t = TripletLoss(margin=margin) + self.criterion_t = MultiSimilarityLoss(margin=args.margin) + self.criterion_x = CrossEntropyLabelSmooth(702) + + ################# + + self.lr = 0. + # self.optimizer = utility.make_optimizer(args, self.model) + self.device = torch.device('cpu' if args.cpu else 'cuda') + + # last_epoch = -1 + + if torch.cuda.is_available(): + self.ckpt.write_log('[INFO] ' + torch.cuda.get_device_name(0)) + + # if args.load != '': + + # checkpointer = torch.load(os.path.join(ckpt.dir, 'model','optimizer.pt')) + # last_epoch = checkpointer['epoch'] - 1 + # self.optimizer.load_state_dict(checkpointer['state_dict']) + + # # self.optimizer.load_state_dict( + # # torch.load(os.path.join(ckpt.dir, 'optimizer.pt'))) + # # last_epoch = int(ckpt.log[-1, 0]) - 1 + # self.ckpt.write_log('[INFO] Optimizer loaded.') + + # # for _ in range(last_epoch): + # # self.scheduler.step() + + # if args.pre_train != '' and args.resume: + # resume_epoch = args.pre_train.split( + # '/')[-1].split('.')[0].split('_')[-1] + # # optimizer_path = + # self.optimizer.load_state_dict( + # torch.load(args.pre_train.replace('model', 'optimizer')) + # ) + # # for _ in range(len(ckpt.log) * args.test_every): + # # self.scheduler.step() + # last_epoch = resume_epoch - 1 + + # self.scheduler = utility.make_scheduler( + # args, self.optimizer, last_epoch) + + self.ckpt.write_log( + '[INFO] Continue from epoch {}'.format(self.scheduler.last_epoch)) + + print(ckpt.log) + # print(self.scheduler._last_lr) + + def train(self): + # self.loss.step() + epoch = self.scheduler.last_epoch + lr = self.scheduler.get_last_lr()[0] + + if lr != self.lr: + self.ckpt.write_log( + '[INFO] Epoch: {}\tLearning rate: {:.2e} '.format(epoch + 1, lr)) + self.lr = lr + self.loss.start_log() + self.model.train() + # for k in self.model.state_dict(): + # print(k) + # print(self.model.state_dict()[k].shape) + # print(self.model.state_dict()[k].requires_grad) + + for batch, d in enumerate(self.train_loader): + inputs, labels = self._parse_data_for_train(d) + + inputs = inputs.to(self.device) + labels = labels.to(self.device) + + self.optimizer.zero_grad() + # outputs = self.model(inputs) + # loss = self.loss(outputs, labels) + + ################### + output1, output2,output3,output4, fea = self.model(inputs) + # loss_c1 = self._compute_loss(self.criterion_c1, fea[0], pids) + # loss_c2 = self._compute_loss(self.criterion_c2, fea[1], pids) + + loss_t1 = self.criterion_t(fea[0], labels) + loss_t2 = self.criterion_t(fea[1], labels) + + loss_x1 = self.criterion_x(output1, labels) + loss_x2 = self.criterion_x(output2, labels) + loss_x3 = self.criterion_x(output3, labels) + loss_x4 = self.criterion_x(output4, labels) + loss1 = (self.weight_x * loss_x1 + self.weight_x * loss_x2+ self.weight_x * loss_x3+ self.weight_x * loss_x4) * 0.5 + + loss2 = (self.weight_t * loss_t1 + self.weight_t * loss_t2) * 0.5 + # loss3 = (loss_c1 + loss_c2) * 0.5 + loss3 = 0. + + loss = loss1 + loss2 + 0.0005 * loss3 + + + loss.backward() + self.optimizer.step() + + # self.ckpt.write_log('\r[INFO] [{}/{}]\t{}/{}\t{}'.format( + # epoch + 1, self.args.epochs, + # batch + 1, len(self.train_loader), + # self.loss.display_loss(batch)), + # end='' if batch + 1 != len(self.train_loader) else '\n') + self.ckpt.write_log('\r[INFO] [{}/{}]\t{}/{}\t{}'.format( + epoch + 1, self.args.epochs, + batch + 1, len(self.train_loader), + loss), + end='' if batch + 1 != len(self.train_loader) else '\n') + + + self.scheduler.step() + self.loss.end_log(len(self.train_loader)) + + def test(self): + epoch = self.scheduler.last_epoch + self.ckpt.write_log('\n[INFO] Test:') + self.model.eval() + + self.ckpt.add_log(torch.zeros(1, 6)) + # qf = self.extract_feature(self.query_loader,self.args).numpy() + # gf = self.extract_feature(self.test_loader,self.args).numpy() + + qf, query_ids, query_cams = self.extract_feature( + self.query_loader, self.args) + gf, gallery_ids, gallery_cams = self.extract_feature( + self.test_loader, self.args) + + # qf = self.extract_feature(self.query_loader) + # gf = self.extract_feature(self.test_loader) + + # query_ids = np.asarray(self.queryset.ids) + # gallery_ids = np.asarray(self.testset.ids) + # query_cams = np.asarray(self.queryset.cameras) + # gallery_cams = np.asarray(self.testset.cameras) + # print(query_ids.shape) + # print(gallery_ids.shape) + # print(query_cams.shape) + # print(gallery_cams.shape) + # np.save('gf',gf.numpy()) + # np.save('qf',qf.numpy()) + # np.save('qc',query_cams) + # np.save('gc',gallery_cams) + # np.save('qi',query_ids) + # np.save('gi',gallery_ids) + # qf=np.load('/content/qf.npy') + # gf=np.load('/content/gf.npy') + # print('save') + # result = scipy.io.loadmat('pytorch_result.mat') + # qf = torch.FloatTensor(result['query_f']).cuda() + # query_cam = result['query_cam'][0] + # query_label = result['query_label'][0] + # gf = torch.FloatTensor(result['gallery_f']).cuda() + # gallery_cam = result['gallery_cam'][0] + # gallery_label = result['gallery_label'][0] + # print(query_cam.shape) + # print(gallery_cam.shape) + # print(query_label.shape) + # print(gallery_label.shape) + + if self.args.re_rank: + q_g_dist = np.dot(qf, np.transpose(gf)) + q_q_dist = np.dot(qf, np.transpose(qf)) + g_g_dist = np.dot(gf, np.transpose(gf)) + dist = re_ranking(q_g_dist, q_q_dist, g_g_dist) + else: + # dist = cdist(qf, gf,metric='cosine') + + # cosine distance + dist = 1 - torch.mm(qf, gf.t()).cpu().numpy() + + # m, n = qf.shape[0], gf.shape[0] + + # dist = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ + # torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() + # dist.addmm_(1, -2, qf, gf.t()) + # dist = np.dot(qf,np.transpose(gf)) + # print('2') + + # r = cmc(dist, self.queryset.ids, self.testset.ids, self.queryset.cameras, self.testset.cameras, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # m_ap = mean_ap(dist, self.queryset.ids, self.testset.ids, + # self.queryset.cameras, self.testset.cameras) + # r = cmc(dist, query_label, gallery_label, query_cam, gallery_cam, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # m_ap = mean_ap(dist, query_label, gallery_label, query_cam, gallery_cam) + # r, m_ap = cmc_baseline(dist, query_label, gallery_label, query_cam, gallery_cam, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # r, m_ap = cmc_baseline(dist, query_ids, gallery_ids, query_cams, gallery_cams, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # r,m_ap=eval_liaoxingyu(dist, query_label, gallery_label, query_cam, gallery_cam, 50) + r, m_ap = eval_liaoxingyu( + dist, query_ids, gallery_ids, query_cams, gallery_cams, 50) + + self.ckpt.log[-1, 0] = epoch + self.ckpt.log[-1, 1] = m_ap + self.ckpt.log[-1, 2] = r[0] + self.ckpt.log[-1, 3] = r[2] + self.ckpt.log[-1, 4] = r[4] + self.ckpt.log[-1, 5] = r[9] + best = self.ckpt.log.max(0) + + # self.ckpt.write_log( + # '[INFO] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f} (Best: {:.4f} @epoch {})'.format( + # m_ap, + # r[0], r[2], r[4], r[9], + # best[0][0], + # (best[1][0] + 1) * self.args.test_every + # ) + # ) + self.ckpt.write_log( + '[INFO] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f} (Best: {:.4f} @epoch {})'.format( + m_ap, + r[0], r[2], r[4], r[9], + best[0][1], self.ckpt.log[best[1][1], 0] + ) + ) + # if not self.args.test_only: + # self.ckpt.save(self, epoch, is_best=( + # (best[1][0] + 1) * self.args.test_every == epoch)) + if not self.args.test_only: + # self.ckpt.save(self, epoch, is_best=( + # self.ckpt.log[best[1][1], 0] == epoch)) + self._save_checkpoint(epoch,r[0],self.ckpt.dir,is_best=( + self.ckpt.log[best[1][1], 0] == epoch)) + + def fliphor(self, inputs): + inv_idx = torch.arange(inputs.size( + 3) - 1, -1, -1).long() # N x C x H x W + return inputs.index_select(3, inv_idx) + + # def extract_feature(self, loader): + # features = torch.FloatTensor() + # for (inputs, labels) in loader: + # ff = torch.FloatTensor(inputs.size(0), 2048).zero_() + # for i in range(2): + # if i == 1: + # inputs = self.fliphor(inputs) + # input_img = inputs.to(self.device) + # outputs = self.model(input_img) + # f = outputs[0].data.cpu() + # ff = ff + f + + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + + # features = torch.cat((features, ff), 0) + # return features + def extract_feature(self, loader, args): + features = torch.FloatTensor() + pids, camids = [], [] + + for d in loader: + inputs, pid, camid = self._parse_data_for_eval(d) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + # print(outputs.shape) + if args.feat_inference == 'after': + # f1 = outputs[0].data.cpu() + # # # flip + # inputs = inputs.index_select( + # 3, torch.arange(inputs.size(3) - 1, -1, -1)) + # input_img = inputs.to(self.device) + # outputs = self.model(input_img) + # f2 = outputs[0].data.cpu() + + f1 = outputs.data.cpu() + # # flip + inputs = inputs.index_select( + 3, torch.arange(inputs.size(3) - 1, -1, -1)) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + f2 = outputs.data.cpu() + + # f2=0 + # print('kkkkk') + else: + f1 = outputs[-1].data.cpu() + # flip + inputs = inputs.index_select( + 3, torch.arange(inputs.size(3) - 1, -1, -1)) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + f2 = outputs[-1].data.cpu() + + ff = f1 + f2 + if ff.dim() == 3: + fnorm = torch.norm( + ff, p=2, dim=1, keepdim=True) * np.sqrt(ff.shape[2]) + ff = ff.div(fnorm.expand_as(ff)) + ff = ff.view(ff.size(0), -1) + # ff = ff.view(ff.size(0), -1) + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + + else: + fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + ff = ff.div(fnorm.expand_as(ff)) + # pass + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + features = torch.cat((features, ff), 0) + pids.extend(pid) + camids.extend(camid) + # print(features.shape) + return features, np.asarray(pids), np.asarray(camids) + + def terminate(self): + if self.args.test_only: + self.test() + return True + else: + epoch = self.scheduler.last_epoch + 1 + + return epoch > self.args.epochs + + # tools for reid datamanager + def _parse_data_for_train(self, data): + imgs = data[0] + pids = data[1] + return imgs, pids + + def _parse_data_for_eval(self, data): + imgs = data[0] + pids = data[1] + camids = data[2] + return imgs, pids, camids + + def _save_checkpoint(self, epoch, rank1, save_dir, is_best=False): + utility.save_checkpoint( + { + 'state_dict': self.model.state_dict(), + 'epoch': epoch, + 'rank1': rank1, + 'optimizer': self.optimizer.state_dict(), + # 'scheduler': self.scheduler.state_dict(), + }, + save_dir, + is_best=is_best + ) + + diff --git a/engine_v3.py b/engine_v3.py new file mode 100644 index 000000000..6b0a3180b --- /dev/null +++ b/engine_v3.py @@ -0,0 +1,239 @@ +import os +import torch +import numpy as np +from scipy.spatial.distance import cdist +from utils.functions import cmc, mean_ap, cmc_baseline, eval_liaoxingyu +from utils.re_ranking import re_ranking + + +class Engine(): + def __init__(self, args, model, optimizer, scheduler, loss, loader, ckpt): + self.args = args + self.train_loader = loader.train_loader + self.test_loader = loader.test_loader + self.query_loader = loader.query_loader + self.testset = loader.galleryset + self.queryset = loader.queryset + + self.ckpt = ckpt + self.model = model + self.optimizer = optimizer + self.scheduler = scheduler + self.loss = loss + + self.lr = 0. + self.device = torch.device('cpu' if args.cpu else 'cuda') + + if torch.cuda.is_available(): + self.ckpt.write_log('[INFO] GPU: ' + torch.cuda.get_device_name(0)) + + self.ckpt.write_log( + '[INFO] Starting from epoch {}'.format(self.scheduler.last_epoch)) + + print(ckpt.log) + # print(self.scheduler._last_lr) + + def train(self): + # self.loss.step() + epoch = self.scheduler.last_epoch + lr = self.scheduler.get_last_lr()[0] + + if lr != self.lr: + self.ckpt.write_log( + '[INFO] Epoch: {}\tLearning rate: {:.2e} '.format(epoch + 1, lr)) + self.lr = lr + self.loss.start_log() + self.model.train() + + for batch, d in enumerate(self.train_loader): + inputs, labels = self._parse_data_for_train(d) + + inputs = inputs.to(self.device) + labels = labels.to(self.device) + + self.optimizer.zero_grad() + outputs = self.model(inputs) + loss = self.loss.compute(outputs, labels) + + loss.backward() + self.optimizer.step() + + self.ckpt.write_log('\r[INFO] [{}/{}]\t{}/{}\t{}'.format( + epoch + 1, self.args.epochs, + batch + 1, len(self.train_loader), + self.loss.display_loss(batch)), + end='' if batch + 1 != len(self.train_loader) else '\n') + + self.scheduler.step() + self.loss.end_log(len(self.train_loader)) + # self._save_checkpoint(epoch, 0., self.ckpt.dir, is_best=True) + + def test(self): + epoch = self.scheduler.last_epoch + self.ckpt.write_log('\n[INFO] Test:') + self.model.eval() + + self.ckpt.add_log(torch.zeros(1, 6)) + # qf = self.extract_feature(self.query_loader,self.args).numpy() + # gf = self.extract_feature(self.test_loader,self.args).numpy() + with torch.no_grad(): + + qf, query_ids, query_cams = self.extract_feature( + self.query_loader, self.args) + gf, gallery_ids, gallery_cams = self.extract_feature( + self.test_loader, self.args) + + if self.args.re_rank: + q_g_dist = np.dot(qf, np.transpose(gf)) + q_q_dist = np.dot(qf, np.transpose(qf)) + g_g_dist = np.dot(gf, np.transpose(gf)) + dist = re_ranking(q_g_dist, q_q_dist, g_g_dist) + else: + # dist = cdist(qf, gf,metric='cosine') + + # cosine distance + dist = 1 - torch.mm(qf, gf.t()).cpu().numpy() + + # m, n = qf.shape[0], gf.shape[0] + + # dist = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ + # torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() + # dist.addmm_(1, -2, qf, gf.t()) + # dist = np.dot(qf,np.transpose(gf)) + # print('2') + + # r = cmc(dist, self.queryset.ids, self.testset.ids, self.queryset.cameras, self.testset.cameras, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # m_ap = mean_ap(dist, self.queryset.ids, self.testset.ids, + # self.queryset.cameras, self.testset.cameras) + # r = cmc(dist, query_label, gallery_label, query_cam, gallery_cam, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # m_ap = mean_ap(dist, query_label, gallery_label, query_cam, gallery_cam) + # r, m_ap = cmc_baseline(dist, query_label, gallery_label, query_cam, gallery_cam, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # r, m_ap = cmc_baseline(dist, query_ids, gallery_ids, query_cams, gallery_cams, + # separate_camera_set=False, + # single_gallery_shot=False, + # first_match_break=True) + # r,m_ap=eval_liaoxingyu(dist, query_label, gallery_label, query_cam, gallery_cam, 50) + r, m_ap = eval_liaoxingyu( + dist, query_ids, gallery_ids, query_cams, gallery_cams, 50) + + self.ckpt.log[-1, 0] = epoch + self.ckpt.log[-1, 1] = m_ap + self.ckpt.log[-1, 2] = r[0] + self.ckpt.log[-1, 3] = r[2] + self.ckpt.log[-1, 4] = r[4] + self.ckpt.log[-1, 5] = r[9] + best = self.ckpt.log.max(0) + + self.ckpt.write_log( + '[INFO] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f} (Best: {:.4f} @epoch {})'.format( + m_ap, + r[0], r[2], r[4], r[9], + best[0][1], self.ckpt.log[best[1][1], 0] + ), refresh=True + ) + + if not self.args.test_only: + # self.ckpt.save(self, epoch, is_best=( + # self.ckpt.log[best[1][1], 0] == epoch)) + self._save_checkpoint(epoch, r[0], self.ckpt.dir, is_best=( + self.ckpt.log[best[1][1], 0] == epoch)) + self.ckpt.plot_map_rank(epoch) + + def fliphor(self, inputs): + inv_idx = torch.arange(inputs.size( + 3) - 1, -1, -1).long() # N x C x H x W + return inputs.index_select(3, inv_idx) + + def extract_feature(self, loader, args): + features = torch.FloatTensor() + pids, camids = [], [] + + for d in loader: + inputs, pid, camid = self._parse_data_for_eval(d) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + # print(outputs.shape) + if args.feat_inference == 'after': + + f1 = outputs.data.cpu() + # flip + inputs = inputs.index_select( + 3, torch.arange(inputs.size(3) - 1, -1, -1)) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + f2 = outputs.data.cpu() + + else: + f1 = outputs[-1].data.cpu() + # flip + inputs = inputs.index_select( + 3, torch.arange(inputs.size(3) - 1, -1, -1)) + input_img = inputs.to(self.device) + outputs = self.model(input_img) + f2 = outputs[-1].data.cpu() + + ff = f1 + f2 + if ff.dim() == 3: + fnorm = torch.norm( + ff, p=2, dim=1, keepdim=True) * np.sqrt(ff.shape[2]) + ff = ff.div(fnorm.expand_as(ff)) + ff = ff.view(ff.size(0), -1) + # ff = ff.view(ff.size(0), -1) + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + + else: + fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + ff = ff.div(fnorm.expand_as(ff)) + # pass + # fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + # ff = ff.div(fnorm.expand_as(ff)) + features = torch.cat((features, ff), 0) + pids.extend(pid) + camids.extend(camid) + # print(features.shape) + return features, np.asarray(pids), np.asarray(camids) + + def terminate(self): + if self.args.test_only: + self.test() + return True + else: + epoch = self.scheduler.last_epoch + 1 + + return epoch > self.args.epochs + + # tools for reid datamanager data_v2 + def _parse_data_for_train(self, data): + imgs = data[0] + pids = data[1] + return imgs, pids + + def _parse_data_for_eval(self, data): + imgs = data[0] + pids = data[1] + camids = data[2] + return imgs, pids, camids + + def _save_checkpoint(self, epoch, rank1, save_dir, is_best=False): + self.ckpt.save_checkpoint( + { + 'state_dict': self.model.state_dict(), + 'epoch': epoch, + 'rank1': rank1, + 'optimizer': self.optimizer.state_dict(), + 'log': self.ckpt.log, + # 'scheduler': self.scheduler.state_dict(), + }, + save_dir, + is_best=is_best + ) diff --git "a/loss/Icon\r" "b/loss/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/loss/__init__.py b/loss/__init__.py new file mode 100644 index 000000000..a906c7b02 --- /dev/null +++ b/loss/__init__.py @@ -0,0 +1,210 @@ +import os +import numpy as np +from importlib import import_module + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +import torch +import torch.nn as nn + +from loss.triplet import TripletLoss, TripletSemihardLoss, CrossEntropyLabelSmooth +from loss.grouploss import GroupLoss +from loss.multi_similarity_loss import MultiSimilarityLoss +from loss.focal_loss import FocalLoss +from loss.osm_caa_loss import OSM_CAA_Loss +from loss.center_loss import CenterLoss + + +class LossFunction(): + def __init__(self, args, ckpt): + super(LossFunction, self).__init__() + print('[INFO] Making loss...') + + self.nGPU = args.nGPU + self.args = args + self.loss = [] + for loss in args.loss.split('+'): + weight, loss_type = loss.split('*') + if loss_type == 'CrossEntropy': + if args.if_labelsmooth: + print(args.num_classes) + loss_function = CrossEntropyLabelSmooth( + num_classes=args.num_classes) + print('Label smooth on') + else: + loss_function = nn.CrossEntropyLoss() + elif loss_type == 'Triplet': + loss_function = TripletLoss(args.margin) + elif loss_type == 'GroupLoss': + loss_function = GroupLoss( + T=args.T, num_classes=args.num_classes, num_anchors=args.num_anchors) + elif loss_type == 'MSLoss': + loss_function = MultiSimilarityLoss(margin=args.margin) + elif loss_type == 'Focal': + loss_function = FocalLoss(reduction='mean') + elif loss_type == 'OSLoss': + loss_function = OSM_CAA_Loss() + elif loss_type == 'CenterLoss': + loss_function = CenterLoss(num_classes=args.num_classes, feat_dim=args.feats) + + # elif loss_type == 'Mix': + # self.fl = FocalLoss(reduction='mean') + # if args.if_labelsmooth: + # self.ce = CrossEntropyLabelSmooth( + # num_classes=args.num_classes) + # print('Label smooth on') + # else: + # self.ce = nn.CrossEntropyLoss() + + # self.tri = TripletLoss(args.margin) + + self.loss.append({ + 'type': loss_type, + 'weight': float(weight), + 'function': loss_function + }) + + if len(self.loss) > 1: + self.loss.append({'type': 'Total', 'weight': 0, 'function': None}) + + # for l in self.loss: + # if l['function'] is not None: + # print('{:.3f} * {}'.format(l['weight'], l['type'])) + # self.loss_module.append(l['function']) + + self.log = torch.Tensor() + + # device = torch.device('cpu' if args.cpu else 'cuda') + # self.loss_module.to(device) + + # # if args.load != '': + # # self.load(ckpt.dir, cpu=args.cpu) + # if not args.cpu and args.nGPU > 1: + # self.loss_module = nn.DataParallel( + # self.loss_module, range(args.nGPU) + # ) + + def compute(self, outputs, labels): + losses = [] + for i, l in enumerate(self.loss): + if l['type'] in ['CrossEntropy']: + + if isinstance(outputs[0], list): + loss = [l['function'](output, labels) + for output in outputs[0]] + elif isinstance(outputs[0], torch.Tensor): + loss = [l['function'](outputs[0],labels)] + else: + raise TypeError('Unexpected type: {}'.format(type(outputs[0]))) + + loss = sum(loss) + effective_loss = l['weight'] * loss + losses.append(effective_loss) + self.log[-1, i] += effective_loss.item() + + elif l['type'] in ['Triplet','MSLoss']: + if isinstance(outputs[-1], list): + loss = [l['function'](output, labels) + for output in outputs[-1]] + elif isinstance(outputs[-1], torch.Tensor): + loss = [l['function'](outputs[-1],labels)] + else: + raise TypeError('Unexpected type: {}'.format(type(outputs[-1]))) + loss = sum(loss) + effective_loss = l['weight'] * loss + losses.append(effective_loss) + self.log[-1, i] += effective_loss.item() + + elif l['type'] in ['CenterLoss']: + if isinstance(outputs[-1], list): + loss = [l['function'](output, labels) + for output in outputs[-1]] + elif isinstance(outputs[-1], torch.Tensor): + loss = [l['function'](outputs[-1],labels)] + else: + raise TypeError('Unexpected type: {}'.format(type(outputs[-1]))) + + loss = sum(loss) + effective_loss = l['weight'] * loss + losses.append(effective_loss) + self.log[-1, i] += effective_loss.item() + + else: + pass + + loss_sum = sum(losses) + + if len(self.loss) > 1: + self.log[-1, -1] += loss_sum.item() + + return loss_sum + + def start_log(self): + self.log = torch.cat((self.log, torch.zeros(1, len(self.loss)))) + + def end_log(self, batches): + self.log[-1].div_(batches) + + def display_loss(self, batch): + n_samples = batch + 1 + log = [] + for l, c in zip(self.loss, self.log[-1]): + log.append('[{}: {:.6f}]'.format(l['type'], c / n_samples)) + + return ''.join(log) + + def plot_loss(self, apath, epoch): + axis = np.linspace(1, epoch, epoch) + for i, l in enumerate(self.loss): + label = '{} Loss'.format(l['type']) + fig = plt.figure() + plt.title(label) + # print(self.log[:, i].numpy(), label) + # print(axis) + plt.plot(axis, self.log[:, i].numpy(), label=label) + plt.legend() + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.grid(True) + plt.savefig('{}/loss_{}.jpg'.format(apath, l['type'])) + plt.close(fig) + + + # Following codes not being used + + def step(self): + for l in self.get_loss_module(): + if hasattr(l, 'scheduler'): + l.scheduler.step() + + def get_loss_module(self): + if self.nGPU == 1: + return self.loss_module + else: + return self.loss_module.module + + def save(self, apath): + torch.save(self.state_dict(), os.path.join(apath, 'loss.pt')) + torch.save(self.log, os.path.join(apath, 'loss_log.pt')) + + def load(self, apath, cpu=False): + if cpu: + kwargs = {'map_location': lambda storage, loc: storage} + else: + kwargs = {} + + self.load_state_dict(torch.load( + os.path.join(apath, 'loss.pt'), + **kwargs + )) + self.log = torch.load(os.path.join(apath, 'loss_log.pt')) + for l in self.loss_module: + if hasattr(l, 'scheduler'): + for _ in range(len(self.log)): + l.scheduler.step() + +def make_loss(args,ckpt): + return LossFunction(args,ckpt) + diff --git a/loss/center_loss.py b/loss/center_loss.py new file mode 100644 index 000000000..ab963abc8 --- /dev/null +++ b/loss/center_loss.py @@ -0,0 +1,74 @@ +from __future__ import absolute_import + +import torch +from torch import nn + + +class CenterLoss(nn.Module): + """Center loss. + Reference: + Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. + Args: + num_classes (int): number of classes. + feat_dim (int): feature dimension. + """ + + def __init__(self, num_classes=751, feat_dim=2048, use_gpu=True): + super(CenterLoss, self).__init__() + self.num_classes = num_classes + self.feat_dim = feat_dim + self.use_gpu = use_gpu + + if self.use_gpu: + self.centers = nn.Parameter(torch.randn( + self.num_classes, self.feat_dim).cuda()) + else: + self.centers = nn.Parameter( + torch.randn(self.num_classes, self.feat_dim)) + + def forward(self, x, labels): + """ + Args: + x: feature matrix with shape (batch_size, feat_dim). + labels: ground truth labels with shape (num_classes). + """ + assert x.size(0) == labels.size( + 0), "features.size(0) is not equal to labels.size(0)" + + batch_size = x.size(0) + distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \ + torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand( + self.num_classes, batch_size).t() + distmat.addmm_(1, -2, x, self.centers.t()) + + classes = torch.arange(self.num_classes).long() + if self.use_gpu: + classes = classes.cuda() + labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) + mask = labels.eq(classes.expand(batch_size, self.num_classes)) + + dist = distmat * mask.float() + loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size + #dist = [] + # for i in range(batch_size): + # value = distmat[i][mask[i]] + # value = value.clamp(min=1e-12, max=1e+12) # for numerical stability + # dist.append(value) + #dist = torch.cat(dist) + #loss = dist.mean() + return loss + + +if __name__ == '__main__': + use_gpu = False + center_loss = CenterLoss(use_gpu=use_gpu) + features = torch.rand(16, 2048) + targets = torch.Tensor( + [0, 1, 2, 3, 2, 3, 1, 4, 5, 3, 2, 1, 0, 0, 5, 4]).long() + if use_gpu: + features = torch.rand(16, 2048).cuda() + targets = torch.Tensor( + [0, 1, 2, 3, 2, 3, 1, 4, 5, 3, 2, 1, 0, 0, 5, 4]).cuda() + + loss = center_loss(features, targets) + print(loss) diff --git a/loss/focal_loss.py b/loss/focal_loss.py new file mode 100644 index 000000000..f1a78228d --- /dev/null +++ b/loss/focal_loss.py @@ -0,0 +1,115 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +# from kornia.utils import one_hot + + +# based on: +# https://github.com/zhezh/focalloss/blob/master/focalloss.py + +def focal_loss( + input: torch.Tensor, + target: torch.Tensor, + alpha: float, + gamma: float = 2.0, + reduction: str = 'none', + eps: float = 1e-8) -> torch.Tensor: + r"""Function that computes Focal loss. + See :class:`~kornia.losses.FocalLoss` for details. + """ + if not torch.is_tensor(input): + raise TypeError("Input type is not a torch.Tensor. Got {}" + .format(type(input))) + + if not len(input.shape) >= 2: + raise ValueError("Invalid input shape, we expect BxCx*. Got: {}" + .format(input.shape)) + + if input.size(0) != target.size(0): + raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).' + .format(input.size(0), target.size(0))) + + n = input.size(0) + out_size = (n,) + input.size()[2:] + if target.size()[1:] != input.size()[2:]: + raise ValueError('Expected target size {}, got {}'.format( + out_size, target.size())) + + if not input.device == target.device: + raise ValueError( + "input and target must be in the same device. Got: {}" .format( + input.device, target.device)) + + # compute softmax over the classes axis + input_soft: torch.Tensor = F.softmax(input, dim=1) + eps + + # create the labels one hot tensor + # target_one_hot: torch.Tensor = one_hot( + # target, num_classes=input.shape[1], + # device=input.device, dtype=input.dtype) + target_one_hot: torch.Tensor = F.one_hot( + target, num_classes=input.shape[1]) + + # compute the actual focal loss + weight = torch.pow(-input_soft + 1., gamma) + + focal = -alpha * weight * torch.log(input_soft) + + loss_tmp = torch.sum(target_one_hot * focal, dim=1) + + if reduction == 'none': + loss = loss_tmp + elif reduction == 'mean': + loss = torch.mean(loss_tmp) + elif reduction == 'sum': + loss = torch.sum(loss_tmp) + else: + raise NotImplementedError("Invalid reduction mode: {}" + .format(reduction)) + return loss + + +class FocalLoss(nn.Module): + r"""Criterion that computes Focal loss. + According to [1], the Focal loss is computed as follows: + .. math:: + \text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t) + where: + - :math:`p_t` is the model's estimated probability for each class. + Arguments: + alpha (float): Weighting factor :math:`\alpha \in [0, 1]`. + gamma (float): Focusing parameter :math:`\gamma >= 0`. + reduction (str, optional): Specifies the reduction to apply to the + output: ‘none’ | ‘mean’ | ‘sum’. ‘none’: no reduction will be applied, + ‘mean’: the sum of the output will be divided by the number of elements + in the output, ‘sum’: the output will be summed. Default: ‘none’. + Shape: + - Input: :math:`(N, C, *)` where C = number of classes. + - Target: :math:`(N, *)` where each value is + :math:`0 ≤ targets[i] ≤ C−1`. + Examples: + >>> N = 5 # num_classes + >>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'} + >>> loss = kornia.losses.FocalLoss(**kwargs) + >>> input = torch.randn(1, N, 3, 5, requires_grad=True) + >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N) + >>> output = loss(input, target) + >>> output.backward() + References: + [1] https://arxiv.org/abs/1708.02002 + """ + + def __init__(self, alpha: float = 1.0, gamma: float = 2.0, + reduction: str = 'none') -> None: + super(FocalLoss, self).__init__() + self.alpha: float = alpha + self.gamma: float = gamma + self.reduction: str = reduction + self.eps: float = 1e-6 + + def forward( # type: ignore + self, + input: torch.Tensor, + target: torch.Tensor) -> torch.Tensor: + return focal_loss(input, target, self.alpha, self.gamma, self.reduction, self.eps) diff --git a/loss/grouploss.py b/loss/grouploss.py new file mode 100644 index 000000000..be2572ab7 --- /dev/null +++ b/loss/grouploss.py @@ -0,0 +1,158 @@ +import torch +from torch import nn +import torch.nn.functional as F + +import numpy as np + + +class GroupLoss(nn.Module): + """Triplet loss with hard positive/negative mining. + + Reference: + Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. + + Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py. + + Args: + margin (float): margin for triplet. + """ + + def __init__(self, T=10, num_classes=751, num_anchors=0): + super(GroupLoss, self).__init__() + + self.T = T + self.num_classes = num_classes + self.num_anchors = num_anchors + self.nllloss = nn.NLLLoss() + # self.cross_entropy=nn.CrossEntropyLoss() + + def forward(self, features, X, targets): + """ + Args: + inputs: feature matrix with shape (batch_size, feat_dim) + targets: ground truth labels with shape (num_classes) + """ + n, m = X.size() + device = X.device + # compute pearson r + ff = features.clone().detach() + fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + ff = ff.div(fnorm.expand_as(ff)).cpu().numpy() + coef = np.corrcoef(ff) + + # features_ = features.detach().cpu().numpy() + # coef = np.corrcoef(features_) + + diago = np.arange(coef.shape[0]) + coef[diago, diago] = 0 + # W = F.relu(torch.tensor((coef - np.diag(np.diag(coef))), + # dtype=torch.float, device=device)) + W = F.relu(torch.tensor(coef, + dtype=torch.float, device=device)) + # print(W,'wwwwwwwwwwww') + for i in range(n): + if torch.sum(W[i]) == 0: + # print(W,'wwwwwwwwwwww') + + W[i, i] = 1 + # print(W,'wwwwwwwwwwww') + + # print(W,'wwwwwwwww') + X = F.softmax(X, dim=1) + # print(X) + # print(torch.argmax(X,dim=1)) + # ramdom select anchors + ids = torch.unique(targets) + # num_samples = n / len(ids) + # print(X.dtype) + # print(targets) + # print(id(X)) + # X_=X.clone().detach() + anchors = [] + for id_ in ids: + anchor = list(np.random.choice(torch.where(targets == id_)[ + 0].cpu(), size=self.num_anchors, replace=False)) + # print(id,'ididiid') + # print(torch.sum(X[anchors])) + # print(torch.argmax(X[anchors])) + anchors += anchor + + # print(torch.argmax(X[anchors])) + + # print(X[:20,:5],'xxxxxxx') + # print(id(X)) + # print(torch.where(X==torch.max(X,dim=1))) + + for i in range(self.T): + X_ = X.clone().detach() + X_[anchors] = torch.tensor(F.one_hot( + targets[anchors], self.num_classes), dtype=torch.float, device=device) + # print(i) + # print(X,'xxxxxxxxxxxx') + # print(X_,'---------') + Pi = torch.mm(W, X_) + # print(Pi) + # print(Pi, 'pipipi') + + PX = torch.mul(X, Pi) + + # X = F.normalize(PX, dim=1, p=1) + + # print(PX,'pxpxpx') + # print(PX.shape) + + # 111111111111111111111111 + # Norm = np.sum(PX.detach().cpu().numpy(), + # axis=1).reshape(-1) # .expand(n,m) + # # print(Norm,'norm') + # Q = 1 / Norm + # # print(Q,'QQQQQQQQQ') + # Q = torch.diag(torch.tensor(Q, dtype=torch.float, device=device)) + + # 2222222222222222222222222 + # denom = PX.detach().norm(p=1, dim=1, keepdim=True).clamp_min(1e-12).expand_as(PX) + # X=PX/denom + + # 3333333333333333333333 + # Q = torch.diag(1 / PX.norm(p=1, dim=1).clamp_min(1e-12)) + Q = torch.diag(1 / PX.detach().norm(p=1, dim=1).clamp_min(1e-12)) + X = torch.mm(Q, PX) + + # 444444444444444444444444444444 + # Q = torch.diag(1 / torch.matmul( + # PX, torch.ones(m, dtype=torch.float, device=device))) + # print(Q,'qqqqq') + # X = torch.matmul(Q, PX) + # Q=torch.pow(Q,-1) + # print(X) + + # 555555555555555555555555555555555555 + # X = F.softmax(PX, dim=1) + + # print(X.requires_grad) + loss = self.nllloss(torch.log(X.clamp_min(1e-12)), targets) + + # loss= self.cross_entropy(X,targets) + return loss + + # #inputs = 1. * inputs / (torch.norm(inputs, 2, dim=-1, keepdim=True).expand_as(inputs) + 1e-12) + # # Compute pairwise distance, replace by the official when merged + # dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) + # dist = dist + dist.t() + # dist.addmm_(1, -2, inputs, inputs.t()) + # dist = dist.clamp(min=1e-12).sqrt() # for numerical stability + # # For each anchor, find the hardest positive and negative + # mask = targets.expand(n, n).eq(targets.expand(n, n).t()) + # print(mask[:8, :8]) + # dist_ap, dist_an = [], [] + # for i in range(n): + # dist_ap.append(dist[i][mask[i]].max().unsqueeze(0)) + # dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0)) + # dist_ap = torch.cat(dist_ap) + # dist_an = torch.cat(dist_an) + # # Compute ranking hinge loss + # y = torch.ones_like(dist_an) + # loss = self.ranking_loss(dist_an, dist_ap, y) + # if self.mutual: + # return loss, dist + # return loss diff --git a/loss/multi_similarity_loss.py b/loss/multi_similarity_loss.py new file mode 100755 index 000000000..f33bca5ca --- /dev/null +++ b/loss/multi_similarity_loss.py @@ -0,0 +1,58 @@ +# Copyright (c) Malong Technologies Co., Ltd. +# All rights reserved. +# +# Contact: github@malong.com +# +# This source code is licensed under the LICENSE file in the root directory of this source tree. + +import torch +from torch import nn + +# from ret_benchmark.losses.registry import LOSS + + +# @LOSS.register('ms_loss') +class MultiSimilarityLoss(nn.Module): + def __init__(self, margin=0.1): + super(MultiSimilarityLoss, self).__init__() + self.thresh = 0.5 + self.margin = margin + + self.scale_pos = 2.0 + self.scale_neg = 40.0 + + def forward(self, feats, labels): + assert feats.size(0) == labels.size(0), \ + f"feats.size(0): {feats.size(0)} is not equal to labels.size(0): {labels.size(0)}" + batch_size = feats.size(0) + feats = nn.functional.normalize(feats, p=2, dim=1) + + sim_mat = torch.matmul(feats, torch.t(feats)) # Shape: batchsize * batch size + + epsilon = 1e-5 + loss = list() + + for i in range(batch_size): + pos_pair_ = sim_mat[i][labels == labels[i]] + # print(pos_pair_.shape) + pos_pair_ = pos_pair_[pos_pair_ < 1 - epsilon] + neg_pair_ = sim_mat[i][labels != labels[i]] + + neg_pair = neg_pair_[neg_pair_ + self.margin > min(pos_pair_)] + pos_pair = pos_pair_[pos_pair_ - self.margin < max(neg_pair_)] + + if len(neg_pair) < 1 or len(pos_pair) < 1: + continue + + # weighting step + pos_loss = 1.0 / self.scale_pos * torch.log( + 1 + torch.sum(torch.exp(-self.scale_pos * (pos_pair - self.thresh)))) + neg_loss = 1.0 / self.scale_neg * torch.log( + 1 + torch.sum(torch.exp(self.scale_neg * (neg_pair - self.thresh)))) + loss.append(pos_loss + neg_loss) + + if len(loss) == 0: + return torch.zeros([], requires_grad=True,device= feats.device) + + loss = sum(loss) / batch_size + return loss diff --git a/loss/osm_caa_loss.py b/loss/osm_caa_loss.py new file mode 100644 index 000000000..05d03e765 --- /dev/null +++ b/loss/osm_caa_loss.py @@ -0,0 +1,99 @@ +import torch +import torch.nn as nn +from torch.autograd import Variable + + +class OSM_CAA_Loss(nn.Module): + def __init__(self, alpha=1.2, l=0.5, use_gpu=True, osm_sigma=0.8): + super(OSM_CAA_Loss, self).__init__() + self.use_gpu = use_gpu + self.alpha = alpha # margin of weighted contrastive loss, as mentioned in the paper + self.l = l # hyperparameter controlling weights of positive set and the negative set + # I haven't been able to figure out the use of \sigma CAA 0.18 + self.osm_sigma = osm_sigma # \sigma OSM (0.8) as mentioned in paper + + def forward(self, x, embd, labels): + ''' + x : feature vector : (n x d) + labels : (n,) + embd : Fully Connected weights of classification layer (dxC), C is the number of classes: represents the vectors for class + ''' + x = nn.functional.normalize(x, p=2, dim=1) # normalize the features + n = x.size(0) + dist = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(n, n) + dist = dist + dist.t() + dist.addmm_(1, -2, x, x.t()) + dist = dist.clamp(min=1e-12).sqrt() + # print(dist,'dist') + S = torch.exp(-1.0 * torch.pow(dist, 2) / + (self.osm_sigma * self.osm_sigma)) + # max (0, self.alpha - dij ) + # print(S,'ssssssssss') + S_ = torch.clamp(self.alpha - dist, min=1e-12) + p_mask = labels.expand(n, n).eq(labels.expand(n, n).t()) + p_mask = p_mask.float() + n_mask = 1 - p_mask + S = S * p_mask.float() + S = S + S_ * n_mask.float() + # embd = nn.functional.normalize(embd, p=2, dim=0) + # denominator = torch.exp(torch.mm(x, embd)) + # A = [] + # for i in range(n): + # a_i = denominator[i][labels[i]] / torch.sum(denominator[i]) + # A.append(a_i) + # atten_class = torch.stack(A) + A = [] + # print(labels,'label') + for i in range(n): + A.append(embd[i][labels[i]]) + atten_class = torch.stack(A) + + # pairwise minimum of attention weights + A = torch.min(atten_class.expand(n, n), + atten_class.view(-1, 1).expand(n, n)) + W = S * A + W_P = W * p_mask.float() + W_N = W * n_mask.float() + if self.use_gpu: + # dist between (xi,xi) not necessarily 0, avoiding precision error + W_P = W_P * (1 - torch.eye(n, n).float().cuda()) + W_N = W_N * (1 - torch.eye(n, n).float().cuda()) + else: + W_P = W_P * (1 - torch.eye(n, n).float()) + W_N = W_N * (1 - torch.eye(n, n).float()) + L_P = 1.0 / 2 * torch.sum(W_P * torch.pow(dist, 2)) / torch.sum(W_P) + L_N = 1.0 / 2 * torch.sum(W_N * torch.pow(S_, 2)) / torch.sum(W_N) + # print(L_P,'lplplplplp') + L = (1 - self.l) * L_P + self.l * L_N + return L + + +if __name__ == '__main__': + # Here I left a simple forward function. + # Test the model, before you train it. + import argparse + + parser = argparse.ArgumentParser(description='MGN') + parser.add_argument('--num_classes', type=int, default=751, help='') + parser.add_argument('--bnneck', type=bool, default=True) + parser.add_argument('--parts', type=int, default=3) + parser.add_argument('--feats', type=int, default=256) + + args = parser.parse_args() + net = OSM_CAA_Loss(use_gpu=False) + # net.classifier = nn.Sequential() + # print([p for p in net.parameters()]) + # a=filter(lambda p: p.requires_grad, net.parameters()) + # print(a) + + print(net) + d = 256 + c = 751 + x = Variable(torch.FloatTensor(8, d)) + label = Variable(torch.arange(8)) + embd = Variable(torch.FloatTensor(d, 751)) + + output = net(x, embd, label) + print('net output size:') + # print(len(output)) + print(output.shape) diff --git a/loss/ranked_loss.py b/loss/ranked_loss.py new file mode 100644 index 000000000..0590fe910 --- /dev/null +++ b/loss/ranked_loss.py @@ -0,0 +1,91 @@ +# encoding: utf-8 +""" +@author: zzg +@contact: xhx1247786632@gmail.com +""" +import torch +from torch import nn + + +def normalize_rank(x, axis=-1): + """Normalizing to unit length along the specified dimension. + Args: + x: pytorch Variable + Returns: + x: pytorch Variable, same shape as input + """ + x = 1. * x / (torch.norm(x, 2, axis, keepdim=True).expand_as(x) + 1e-12) + return x + + +def euclidean_dist_rank(x, y): + """ + Args: + x: pytorch Variable, with shape [m, d] + y: pytorch Variable, with shape [n, d] + Returns: + dist: pytorch Variable, with shape [m, n] + """ + m, n = x.size(0), y.size(0) + xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) + yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() + dist = xx + yy + dist.addmm_(1, -2, x, y.t()) + dist = dist.clamp(min=1e-12).sqrt() # for numerical stability + return dist + + +def rank_loss(dist_mat, labels, margin, alpha, tval): + """ + Args: + dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N] + labels: pytorch LongTensor, with shape [N] + + """ + assert len(dist_mat.size()) == 2 + assert dist_mat.size(0) == dist_mat.size(1) + N = dist_mat.size(0) + + total_loss = 0.0 + for ind in range(N): + is_pos = labels.eq(labels[ind]) + is_pos[ind] = 0 + is_neg = labels.ne(labels[ind]) + + dist_ap = dist_mat[ind][is_pos] + dist_an = dist_mat[ind][is_neg] + + ap_is_pos = torch.clamp(torch.add(dist_ap, margin - alpha), min=0.0) + ap_pos_num = ap_is_pos.size(0) + 1e-5 + ap_pos_val_sum = torch.sum(ap_is_pos) + loss_ap = torch.div(ap_pos_val_sum, float(ap_pos_num)) + + an_is_pos = torch.lt(dist_an, alpha) + an_less_alpha = dist_an[an_is_pos] + an_weight = torch.exp(tval * (-1 * an_less_alpha + alpha)) + an_weight_sum = torch.sum(an_weight) + 1e-5 + an_dist_lm = alpha - an_less_alpha + an_ln_sum = torch.sum(torch.mul(an_dist_lm, an_weight)) + loss_an = torch.div(an_ln_sum, an_weight_sum) + + total_loss = total_loss + loss_ap + loss_an + total_loss = total_loss * 1.0 / N + return total_loss + + +class RankedLoss(object): + "Ranked_List_Loss_for_Deep_Metric_Learning_CVPR_2019_paper" + + def __init__(self, margin=None, alpha=None, tval=None): + self.margin = margin + self.alpha = alpha + self.tval = tval + + def __call__(self, global_feat, labels, normalize_feature=True): + if normalize_feature: + global_feat = normalize_rank(global_feat, axis=-1) + dist_mat = euclidean_dist_rank(global_feat, global_feat) + total_loss = rank_loss( + dist_mat, labels, self.margin, self.alpha, self.tval) + + return total_loss diff --git a/loss/triplet.py b/loss/triplet.py new file mode 100755 index 000000000..94ace9561 --- /dev/null +++ b/loss/triplet.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import torch +from torch import nn +from torch.nn import functional as F + + +class TripletSemihardLoss(nn.Module): + """ + Shape: + - Input: :math:`(N, C)` where `C = number of channels` + - Target: :math:`(N)` + - Output: scalar. + """ + + def __init__(self, device, margin=0, size_average=True): + super(TripletSemihardLoss, self).__init__() + self.margin = margin + self.size_average = size_average + self.device = device + + def forward(self, input, target): + y_true = target.int().unsqueeze(-1) + same_id = torch.eq(y_true, y_true.t()).type_as(input) + + pos_mask = same_id + neg_mask = 1 - same_id + + def _mask_max(input_tensor, mask, axis=None, keepdims=False): + input_tensor = input_tensor - 1e6 * (1 - mask) + _max, _idx = torch.max(input_tensor, dim=axis, keepdim=keepdims) + return _max, _idx + + def _mask_min(input_tensor, mask, axis=None, keepdims=False): + input_tensor = input_tensor + 1e6 * (1 - mask) + _min, _idx = torch.min(input_tensor, dim=axis, keepdim=keepdims) + return _min, _idx + + # output[i, j] = || feature[i, :] - feature[j, :] ||_2 + dist_squared = torch.sum(input ** 2, dim=1, keepdim=True) + \ + torch.sum(input.t() ** 2, dim=0, keepdim=True) - \ + 2.0 * torch.matmul(input, input.t()) + dist = dist_squared.clamp(min=1e-16).sqrt() + + pos_max, pos_idx = _mask_max(dist, pos_mask, axis=-1) + neg_min, neg_idx = _mask_min(dist, neg_mask, axis=-1) + + # loss(x, y) = max(0, -y * (x1 - x2) + margin) + y = torch.ones(same_id.size()[0]).to(self.device) + return F.margin_ranking_loss(neg_min.float(), + pos_max.float(), + y, + self.margin, + self.size_average) + + +class TripletLoss(nn.Module): + """Triplet loss with hard positive/negative mining. + + Reference: + Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. + + Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py. + + Args: + margin (float): margin for triplet. + """ + + def __init__(self, margin=0.3, mutual_flag=False): + super(TripletLoss, self).__init__() + self.margin = margin + if margin == 0.: + self.ranking_loss = nn.SoftMarginLoss() + print('Using soft margin triplet loss') + else: + self.ranking_loss = nn.MarginRankingLoss(margin=margin) + + self.mutual = mutual_flag + + def forward(self, inputs, targets): + """ + Args: + inputs: feature matrix with shape (batch_size, feat_dim) + targets: ground truth labels with shape (num_classes) + """ + n = inputs.size(0) + # print(inputs.shape) + # print(targets) + # print(targets.shape) + # inputs = 1. * inputs / (torch.norm(inputs, 2, dim=-1, keepdim=True).expand_as(inputs) + 1e-12) + # Compute pairwise distance, replace by the official when merged + dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) + dist = dist + dist.t() + dist.addmm_(1, -2, inputs, inputs.t()) + # print(dist,'ccccccc') + dist = dist.clamp(min=1e-12).sqrt() # for numerical stability + # print(dist,'ddddd/dd') + # For each anchor, find the hardest positive and negative + mask = targets.expand(n, n).eq(targets.expand(n, n).t()) + # print(mask[:8,:8]) + dist_ap, dist_an = [], [] + for i in range(n): + dist_ap.append(dist[i][mask[i]].max().unsqueeze(0)) + dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0)) + dist_ap = torch.cat(dist_ap) + dist_an = torch.cat(dist_an) + # print(dist_ap,'apapapapap') + # Compute ranking hinge loss + y = torch.ones_like(dist_an) + # loss = self.ranking_loss(dist_an, dist_ap, y) + if self.margin == 0.: + loss = self.ranking_loss(dist_an - dist_ap, y) + else: + loss = self.ranking_loss(dist_an, dist_ap, y) + + if self.mutual: + return loss, dist + return loss + + +class CrossEntropyLabelSmooth(nn.Module): + """Cross entropy loss with label smoothing regularizer. + + Reference: + Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. + Equation: y = (1 - epsilon) * y + epsilon / K. + + Args: + num_classes (int): number of classes. + epsilon (float): weight. + """ + + def __init__(self, num_classes, epsilon=0.1, use_gpu=True): + super(CrossEntropyLabelSmooth, self).__init__() + self.num_classes = num_classes + self.epsilon = epsilon + self.use_gpu = use_gpu + self.logsoftmax = nn.LogSoftmax(dim=1) + + def forward(self, inputs, targets): + """ + Args: + inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) + targets: ground truth labels with shape (num_classes) + """ + log_probs = self.logsoftmax(inputs) + targets = torch.zeros(log_probs.size()).scatter_( + 1, targets.unsqueeze(1).data.cpu(), 1) + if self.use_gpu: + targets = targets.cuda() + targets = (1 - self.epsilon) * targets + \ + self.epsilon / self.num_classes + loss = (- targets * log_probs).mean(0).sum() + return loss diff --git a/main.py b/main.py new file mode 100755 index 000000000..87fb739e3 --- /dev/null +++ b/main.py @@ -0,0 +1,53 @@ +import data_v1 +import data_v2 +import loss +import model +import optim +import engine_v1 +import engine_v2 +import engine_v3 +import os.path as osp +from option import args +import utils.utility as utility +from utils.model_complexity import compute_model_complexity +from torch.utils.collect_env import get_pretty_env_info +import yaml + + +if args.config != '': + with open(args.config, 'r') as f: + config = yaml.load(f) + for op in config: + setattr(args, op, config[op]) + +# loader = data.Data(args) +ckpt = utility.checkpoint(args) +loader = data_v2.ImageDataManager(args) +model = model.Model(args, ckpt) +optimzer = optim.make_optimizer(args, model) +loss = loss.make_loss(args, ckpt) if not args.test_only else None + + +start = -1 +if args.load != '': + start = ckpt.resume_from_checkpoint( + osp.join(ckpt.dir, 'model.pth.tar-latest'), model, optimzer) - 1 +if args.pre_train != '': + ckpt.load_pretrained_weights(model, args.pre_train) + +scheduler = optim.make_scheduler(args, optimzer, start) + +# print('[INFO] System infomation: \n {}'.format(get_pretty_env_info())) +ckpt.write_log('[INFO] Model parameters: {com[0]} flops: {com[1]}'.format(com=compute_model_complexity(model, (1, 3, args.height, args.width)) + )) + +engine = engine_v3.Engine(args, model, optimzer, + scheduler, loss, loader, ckpt) +# engine = engine.Engine(args, model, loss, loader, ckpt) + +n = 0 +while not engine.terminate(): + n += 1 + engine.train() + if args.test_every != 0 and n % args.test_every == 0: + engine.test() diff --git "a/model/Icon\r" "b/model/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/model/__init__.py b/model/__init__.py new file mode 100755 index 000000000..8d8d87062 --- /dev/null +++ b/model/__init__.py @@ -0,0 +1,136 @@ +import os +from importlib import import_module + +import torch +import torch.nn as nn +import os.path as osp +from collections import OrderedDict + + +class Model(nn.Module): + + def __init__(self, args, ckpt): + super(Model, self).__init__() + print('[INFO] Making {} model...'.format(args.model)) + + self.device = torch.device('cpu' if args.cpu else 'cuda') + self.nGPU = args.nGPU + # self.save_models = args.save_models + + module = import_module('model.' + args.model.lower()) + # self.model = module.make_model(args).to(self.device) + self.model = getattr(module, args.model)(args).to(self.device) + + if not args.cpu and args.nGPU > 1: + self.model = nn.DataParallel(self.model, range(args.nGPU)) + + # if args.load != '' or args.pre_train != '': + # print(ckpt.dir) + # self.load( + # ckpt.dir, + # pre_train=args.pre_train, + # resume=args.resume, + # cpu=args.cpu + # ) + # else: + # print('Pretained or latest model not exist, training from scratch.') + + def forward(self, x): + return self.model(x) + + def get_model(self): + if self.nGPU == 1: + return self.model + else: + return self.model.module + + def save(self, apath, epoch, is_best=False): + target = self.get_model() + torch.save( + target.state_dict(), + os.path.join(apath, 'model', 'model_latest.pt') + ) + if is_best: + torch.save( + target.state_dict(), + os.path.join(apath, 'model', 'model_best.pt') + ) + + if self.save_models: + torch.save( + target.state_dict(), + os.path.join(apath, 'model', 'model_{}.pt'.format(epoch)) + ) + + def load(self, apath, pre_train='', resume=-1, cpu=False): + if cpu: + kwargs = {'map_location': lambda storage, loc: storage} + else: + kwargs = {} + + # if resume == -1: + # print('Loading model from last checkpoint') + # self.get_model().load_state_dict( + # torch.load( + # os.path.join(apath, 'model', 'model_latest.pt'), + # **kwargs + # ), + # strict=False + # ) + # elif resume == 0: + # if pre_train != '': + # print('Loading model from {}'.format(pre_train)) + # self.get_model().load_state_dict( + # torch.load(pre_train, **kwargs), + # strict=False + # ) + # modified on 01.02.1010 + # if resume == 0: + # if pre_train != '': + # print('Loading model from {}'.format(pre_train)) + # self.get_model().load_state_dict( + # torch.load(pre_train, **kwargs), + # strict=False + # ) + # else: + # print('Loading model from last checkpoint') + # self.get_model().load_state_dict( + # torch.load( + # os.path.join(apath, 'model', 'model_latest.pt'), + # **kwargs + # ), + # strict=False + # ) + # else: + # self.get_model().load_state_dict( + # torch.load( + # os.path.join(apath, 'model', 'model_{}.pt'.format(resume)), + # **kwargs + # ), + # strict=False + # ) + # modified on 01.02.1010 + if pre_train != '': + print('Loading model from {}'.format(pre_train)) + if pre_train.split('.')[-1][:3] == 'tar': + print('load checkpointerrrrrrr') + # checkpoint = self.load_checkpoint(pre_train) + # self.get_model().load_state_dict(checkpoint['state_dict']) + self.load_pretrained_weights(self.get_model(), pre_train) + else: + + self.get_model().load_state_dict( + torch.load(pre_train, **kwargs), + strict=False + ) + else: + print('Loading model from last checkpoint') + # print(apath) + self.get_model().load_state_dict( + torch.load( + os.path.join(apath, 'model', 'model_latest.pt'), + **kwargs + ), + # strict=False + ) + diff --git a/model/attention.py b/model/attention.py new file mode 100644 index 000000000..71c621743 --- /dev/null +++ b/model/attention.py @@ -0,0 +1,129 @@ +########################################################################### +# Created by: CASIA IVA +# Email: jliu@nlpr.ia.ac.cn +# Copyright (c) 2018 + +# Reference: Dual Attention Network for Scene Segmentation +# https://arxiv.org/pdf/1809.02983.pdf +# https://github.com/junfu1115/DANet/blob/master/encoding/nn/attention.py +########################################################################### + +import numpy as np +import torch +import math +from torch.nn import Module, Sequential, Conv2d, ReLU, AdaptiveMaxPool2d, AdaptiveAvgPool2d, \ + NLLLoss, BCELoss, CrossEntropyLoss, AvgPool2d, MaxPool2d, Parameter, Linear, Sigmoid, Softmax, Dropout, Embedding +from torch.nn import functional as F +from torch.autograd import Variable +torch_ver = torch.__version__[:3] + +__all__ = ['PAM_Module', 'CAM_Module', 'Dual_Module', 'SE_Module'] + + +class PAM_Module(Module): + """ Position attention module""" + # Ref from SAGAN + + def __init__(self, in_dim): + super(PAM_Module, self).__init__() + self.chanel_in = in_dim + + self.query_conv = Conv2d( + in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1) + self.key_conv = Conv2d( + in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1) + self.value_conv = Conv2d( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.gamma = Parameter(torch.zeros(1)) + + self.softmax = Softmax(dim=-1) + + def forward(self, x): + """ + inputs : + x : input feature maps( B X C X H X W) + returns : + out : attention value + input feature + attention: B X (HxW) X (HxW) + """ + m_batchsize, C, height, width = x.size() + proj_query = self.query_conv(x).view( + m_batchsize, -1, width * height).permute(0, 2, 1) + proj_key = self.key_conv(x).view(m_batchsize, -1, width * height) + energy = torch.bmm(proj_query, proj_key) + attention = self.softmax(energy) + proj_value = self.value_conv(x).view(m_batchsize, -1, width * height) + + out = torch.bmm(proj_value, attention.permute(0, 2, 1)) + out = out.view(m_batchsize, C, height, width) + + out = self.gamma * out + x + return out + + +class CAM_Module(Module): + """ Channel attention module""" + + def __init__(self, in_dim): + super(CAM_Module, self).__init__() + self.chanel_in = in_dim + + self.gamma = Parameter(torch.zeros(1)) + self.softmax = Softmax(dim=-1) + + def forward(self, x): + """ + inputs : + x : input feature maps( B X C X H X W) + returns : + out : attention value + input feature + attention: B X C X C + """ + m_batchsize, C, height, width = x.size() + proj_query = x.view(m_batchsize, C, -1) + proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max( + energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = self.softmax(energy_new) + proj_value = x.view(m_batchsize, C, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(m_batchsize, C, height, width) + + out = self.gamma * out + x + return out + + +class Dual_Module(Module): + + def __init__(self, in_dim): + super(Dual_Module).__init__() + self.indim = in_dim + self.pam = PAM_Module(in_dim) + self.cam = CAM_Module(in_dim) + + def forward(self, x): + out1 = self.pam(x) + out2 = self.cam(x) + return out1 + out2 + + +class SE_Module(Module): + + def __init__(self, channels, reduction=4): + super(SE_Module, self).__init__() + self.fc1 = Conv2d(channels, channels // reduction, + kernel_size=1, padding=0) + self.relu = ReLU(inplace=True) + self.fc2 = Conv2d(channels // reduction, channels, + kernel_size=1, padding=0) + self.sigmoid = Sigmoid() + + def forward(self, x): + module_input = x + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.sigmoid(x) + return module_input * x diff --git a/model/bnneck.py b/model/bnneck.py new file mode 100644 index 000000000..befe44632 --- /dev/null +++ b/model/bnneck.py @@ -0,0 +1,152 @@ +from torch import nn + +class BNNeck(nn.Module): + def __init__(self, input_dim, class_num, return_f=False): + super(BNNeck, self).__init__() + self.return_f = return_f + self.bn = nn.BatchNorm2d(input_dim) + self.bn.bias.requires_grad_(False) + self.classifier = nn.Linear(input_dim, class_num, bias=False) + self.bn.apply(self.weights_init_kaiming) + self.classifier.apply(self.weights_init_classifier) + + def forward(self, x): + before_neck = x.squeeze(dim=3).squeeze(dim=2) + # print(before_neck.shape) + after_neck = self.bn(x).squeeze(dim=3).squeeze(dim=2) + if self.return_f: + score = self.classifier(after_neck) + return after_neck, score, before_neck + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight, std=0.001) + if m.bias: + nn.init.constant_(m.bias, 0.0) + + +class BNNeck3(nn.Module): + def __init__(self, input_dim, class_num, feat_dim, return_f=False): + super(BNNeck3, self).__init__() + self.return_f = return_f + # self.reduction = nn.Linear(input_dim, feat_dim) + # self.bn = nn.BatchNorm1d(feat_dim) + + self.reduction = nn.Conv2d( + input_dim, feat_dim, 1, bias=False) + self.bn = nn.BatchNorm2d(feat_dim) + + self.bn.bias.requires_grad_(False) + self.classifier = nn.Linear(feat_dim, class_num, bias=False) + self.bn.apply(self.weights_init_kaiming) + self.classifier.apply(self.weights_init_classifier) + + def forward(self, x): + x = self.reduction(x) + before_neck = x.squeeze(dim=3).squeeze(dim=2) + after_neck = self.bn(x).squeeze(dim=3).squeeze(dim=2) + if self.return_f: + score = self.classifier(after_neck) + return after_neck, score, before_neck + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight, std=0.001) + if m.bias: + nn.init.constant_(m.bias, 0.0) + +# Defines the new fc layer and classification layer +# |--Linear--|--bn--|--relu--|--Linear--| + + +class ClassBlock(nn.Module): + def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True, return_f=False): + super(ClassBlock, self).__init__() + self.return_f = return_f + add_block = [] + if linear: + add_block += [nn.Linear(input_dim, num_bottleneck)] + else: + num_bottleneck = input_dim + if bnorm: + add_block += [nn.BatchNorm1d(num_bottleneck)] + if relu: + add_block += [nn.LeakyReLU(0.1)] + if droprate > 0: + add_block += [nn.Dropout(p=droprate)] + add_block = nn.Sequential(*add_block) + add_block.apply(self.weights_init_kaiming) + + classifier = [] + classifier += [nn.Linear(num_bottleneck, class_num)] + classifier = nn.Sequential(*classifier) + classifier.apply(self.weights_init_classifier) + + self.add_block = add_block + self.classifier = classifier + + def forward(self, x): + x = self.add_block(x.squeeze(3).squeeze(2)) + if self.return_f: + f = x + x = self.classifier(x) + return f, x, f + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + # print(classname) + if classname.find('Conv') != -1: + # For old pytorch, you may use kaiming_normal. + nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_out') + nn.init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm1d') != -1: + nn.init.normal_(m.weight.data, 1.0, 0.02) + nn.init.constant_(m.bias.data, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight.data, std=0.001) + nn.init.constant_(m.bias.data, 0.0) + diff --git a/model/mcmp_n.py b/model/mcmp_n.py new file mode 100644 index 000000000..63d74e3b2 --- /dev/null +++ b/model/mcmp_n.py @@ -0,0 +1,237 @@ +import copy + +import torch +from torch import nn +import torch.nn.functional as F +import random +import math +from .osnet import osnet_x1_0, OSBlock +from .attention import PAM_Module, CAM_Module, SE_Module, Dual_Module +from .bnneck import BNNeck, BNNeck3 + +from torch.autograd import Variable + + +class BatchDrop(nn.Module): + def __init__(self, h_ratio, w_ratio): + super(BatchDrop, self).__init__() + self.h_ratio = h_ratio + self.w_ratio = w_ratio + + def forward(self, x): + if self.training: + h, w = x.size()[-2:] + rh = round(self.h_ratio * h) + rw = round(self.w_ratio * w) + sx = random.randint(0, h - rh) + sy = random.randint(0, w - rw) + mask = x.new_ones(x.size()) + mask[:, :, sx:sx + rh, sy:sy + rw] = 0 + x = x * mask + return x + + +class BatchRandomErasing(nn.Module): + def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]): + super(BatchRandomErasing, self).__init__() + + self.probability = probability + self.mean = mean + self.sl = sl + self.sh = sh + self.r1 = r1 + + def forward(self, img): + if self.training: + # print(img.size(),'lllllll') + if random.uniform(0, 1) > self.probability: + return img + + for attempt in range(100): + + area = img.size()[2] * img.size()[3] + + target_area = random.uniform(self.sl, self.sh) * area + aspect_ratio = random.uniform(self.r1, 1 / self.r1) + + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + + if w < img.size()[3] and h < img.size()[2]: + x1 = random.randint(0, img.size()[2] - h) + y1 = random.randint(0, img.size()[3] - w) + if img.size()[1] == 3: + img[:,0, x1:x1 + h, y1:y1 + w] = self.mean[0] + img[:,1, x1:x1 + h, y1:y1 + w] = self.mean[1] + img[:,2, x1:x1 + h, y1:y1 + w] = self.mean[2] + else: + img[:,0, x1:x1 + h, y1:y1 + w] = self.mean[0] + return img + + return img + + +class MCMP_n(nn.Module): + def __init__(self, args): + super(MCMP_n, self).__init__() + + self.n_ch = 2 + self.chs = 512 // self.n_ch + + osnet = osnet_x1_0(pretrained=True) + attention = CAM_Module(256) + + self.backone = nn.Sequential( + osnet.conv1, + osnet.maxpool, + osnet.conv2, + attention, + osnet.conv3[0] + ) + + conv3 = osnet.conv3[1:] + + downsample_conv4 = osnet._make_layer(OSBlock, 2, 384, 512, True) + downsample_conv4[:2].load_state_dict(osnet.conv4[:2].state_dict()) + + self.global_branch = nn.Sequential(copy.deepcopy( + conv3), copy.deepcopy(downsample_conv4), copy.deepcopy(osnet.conv5)) + + self.partial_branch = nn.Sequential(copy.deepcopy( + conv3), copy.deepcopy(osnet.conv4), copy.deepcopy(osnet.conv5)) + + self.channel_branch = nn.Sequential(copy.deepcopy( + conv3), copy.deepcopy(osnet.conv4), copy.deepcopy(osnet.conv5)) + + if args.pool == 'max': + pool2d = nn.AdaptiveMaxPool2d + elif args.pool == 'avg': + pool2d = nn.AdaptiveAvgPool2d + else: + raise Exception() + + self.global_pooling = pool2d((1, 1)) + self.partial_pooling = pool2d((2, 1)) + self.channel_pooling = pool2d((1, 1)) + + reduction = BNNeck3(512, args.num_classes, + args.feats, return_f=True) + self.reduction_0 = copy.deepcopy(reduction) + self.reduction_1 = copy.deepcopy(reduction) + self.reduction_2 = copy.deepcopy(reduction) + self.reduction_3 = copy.deepcopy(reduction) + + self.shared = nn.Sequential(nn.Conv2d( + self.chs, args.feats, 1, bias=False), nn.BatchNorm2d(args.feats), nn.ReLU(True)) + self.weights_init_kaiming(self.shared) + + self.reduction_ch_0 = BNNeck(args.feats, args.num_classes, return_f=True) + self.reduction_ch_1 = BNNeck(args.feats, args.num_classes, return_f=True) + + if args.drop_block: + print('Using batch random erasing block.') + self.batch_drop_block = BatchRandomErasing() + # if args.drop_block: + # print('Using batch drop block.') + # self.batch_drop_block = BatchDrop(h_ratio=0.33, w_ratio=1) + else: + self.batch_drop_block = None + + def forward(self, x): + # if self.batch_drop_block is not None: + # x = self.batch_drop_block(x) + + x = self.backone(x) + + glo = self.global_branch(x) + par = self.partial_branch(x) + cha = self.channel_branch(x) + + if self.batch_drop_block is not None: + glo = self.batch_drop_block(glo) + + glo = self.global_pooling(glo) # shape:(batchsize, 2048,1,1) + g_par = self.global_pooling(par) # shape:(batchsize, 2048,1,1) + p_par = self.partial_pooling(par) # shape:(batchsize, 2048,3,1) + cha = self.channel_pooling(cha) + + p0 = p_par[:, :, 0:1, :] + p1 = p_par[:, :, 1:2, :] + + f_glo = self.reduction_0(glo) + f_p0 = self.reduction_1(g_par) + f_p1 = self.reduction_2(p0) + f_p2 = self.reduction_3(p1) + + ################ + + c0 = cha[:, :self.chs, :, :] + c1 = cha[:, self.chs:, :, :] + c0 = self.shared(c0) + c1 = self.shared(c1) + f_c0 = self.reduction_ch_0(c0) + f_c1 = self.reduction_ch_1(c1) + + ################ + + fea = [f_glo[-1], f_p0[-1]] + + if not self.training: + a1 = F.normalize(f_glo[0], p=2, dim=1) + a2 = F.normalize(f_p0[0], p=2, dim=1) + a3 = F.normalize(f_p1[0], p=2, dim=1) + a4 = F.normalize(f_p2[0], p=2, dim=1) + + a5 = F.normalize(f_c0[0], p=2, dim=1) + a6 = F.normalize(f_c1[0], p=2, dim=1) + + return torch.cat([a1, a2, a3, a4, a5, a6], 1) + + return [f_glo[1], f_p0[1], f_p1[1], f_p2[1], f_c0[1], f_c1[1]], fea + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + +if __name__ == '__main__': + # Here I left a simple forward function. + # Test the model, before you train it. + import argparse + + parser = argparse.ArgumentParser(description='MGN') + parser.add_argument('--num_classes', type=int, default=751, help='') + parser.add_argument('--bnneck', type=bool, default=True) + parser.add_argument('--pool', type=str, default='max') + parser.add_argument('--feats', type=int, default=512) + parser.add_argument('--drop_block', type=bool, default=True) + parser.add_argument('--w_ratio', type=float, default=1.0, help='') + + args = parser.parse_args() + net = MCMP_cam_2_m(args) + # net.classifier = nn.Sequential() + # print([p for p in net.parameters()]) + # a=filter(lambda p: p.requires_grad, net.parameters()) + # print(a) + + print(net) + input = Variable(torch.FloatTensor(8, 3, 384, 128)) + net.eval() + output = net(input) + print(output.shape) + print('net output size:') + # print(len(output)) + # for k in output[0]: + # print(k.shape) + # for k in output[1]: + # print(k.shape) diff --git a/model/mcmp_r.py b/model/mcmp_r.py new file mode 100644 index 000000000..36f770387 --- /dev/null +++ b/model/mcmp_r.py @@ -0,0 +1,208 @@ +import copy + +import torch +from torch import nn +import torch.nn.functional as F +import random +import math +from .osnet import osnet_x1_0, OSBlock +from .attention import PAM_Module, CAM_Module, SE_Module, Dual_Module +from .bnneck import BNNeck, BNNeck3 +from torchvision.models.resnet import resnet50, Bottleneck + + +from torch.autograd import Variable + + +class BatchDrop(nn.Module): + def __init__(self, h_ratio, w_ratio): + super(BatchDrop, self).__init__() + self.h_ratio = h_ratio + self.w_ratio = w_ratio + + def forward(self, x): + if self.training: + h, w = x.size()[-2:] + rh = round(self.h_ratio * h) + rw = round(self.w_ratio * w) + sx = random.randint(0, h - rh) + sy = random.randint(0, w - rw) + mask = x.new_ones(x.size()) + mask[:, :, sx:sx + rh, sy:sy + rw] = 0 + x = x * mask + return x + + + + +class MCMP_r(nn.Module): + def __init__(self, args): + super(MCMP_r, self).__init__() + + self.n_ch = 2 + self.chs = 2048 // self.n_ch + + resnet = resnet50(pretrained=True) + + self.backone = nn.Sequential( + resnet.conv1, + resnet.bn1, + resnet.relu, + resnet.maxpool, + resnet.layer1, + resnet.layer2, + resnet.layer3[0], + ) + + conv3 = nn.Sequential(*resnet.layer3[1:]) + no_downsample_conv4 = nn.Sequential( + Bottleneck(1024, 512, downsample=nn.Sequential( + nn.Conv2d(1024, 2048, 1, bias=False), nn.BatchNorm2d(2048))), + Bottleneck(2048, 512), + Bottleneck(2048, 512)) + no_downsample_conv4.load_state_dict(resnet.layer4.state_dict()) + + self.global_branch = nn.Sequential(copy.deepcopy( + conv3), copy.deepcopy(resnet.layer4)) + + self.partial_branch = nn.Sequential(copy.deepcopy( + conv3), copy.deepcopy(no_downsample_conv4)) + + self.channel_branch = nn.Sequential(copy.deepcopy( + conv3), copy.deepcopy(no_downsample_conv4)) + + if args.pool == 'max': + pool2d = nn.AdaptiveMaxPool2d + elif args.pool == 'avg': + pool2d = nn.AdaptiveAvgPool2d + else: + raise Exception() + + self.global_pooling = pool2d((1, 1)) + self.partial_pooling = pool2d((2, 1)) + self.channel_pooling = pool2d((1, 1)) + + reduction = BNNeck3(2048, args.num_classes, + args.feats, return_f=True) + self.reduction_0 = copy.deepcopy(reduction) + self.reduction_1 = copy.deepcopy(reduction) + self.reduction_2 = copy.deepcopy(reduction) + self.reduction_3 = copy.deepcopy(reduction) + + self.shared = nn.Sequential(nn.Conv2d( + self.chs, args.feats, 1, bias=False), nn.BatchNorm2d(args.feats), nn.ReLU(True)) + self.weights_init_kaiming(self.shared) + + self.reduction_ch_0 = BNNeck(args.feats, args.num_classes, return_f=True) + self.reduction_ch_1 = BNNeck(args.feats, args.num_classes, return_f=True) + + # if args.drop_block: + # print('Using batch random erasing block.') + # self.batch_drop_block = BatchRandomErasing() + if args.drop_block: + print('Using batch drop block.') + self.batch_drop_block = BatchDrop(h_ratio=args.h_ratio, w_ratio=args.w_ratio) + else: + self.batch_drop_block = None + + def forward(self, x): + # if self.batch_drop_block is not None: + # x = self.batch_drop_block(x) + + x = self.backone(x) + + glo = self.global_branch(x) + par = self.partial_branch(x) + cha = self.channel_branch(x) + + if self.batch_drop_block is not None: + glo = self.batch_drop_block(glo) + + glo = self.global_pooling(glo) # shape:(batchsize, 2048,1,1) + g_par = self.global_pooling(par) # shape:(batchsize, 2048,1,1) + p_par = self.partial_pooling(par) # shape:(batchsize, 2048,3,1) + cha = self.channel_pooling(cha) + + p0 = p_par[:, :, 0:1, :] + p1 = p_par[:, :, 1:2, :] + print(glo.shape) + f_glo = self.reduction_0(glo) + f_p0 = self.reduction_1(g_par) + f_p1 = self.reduction_2(p0) + f_p2 = self.reduction_3(p1) + + ################ + + c0 = cha[:, :self.chs, :, :] + c1 = cha[:, self.chs:, :, :] + print(c0.shape) + c0 = self.shared(c0) + c1 = self.shared(c1) + f_c0 = self.reduction_ch_0(c0) + f_c1 = self.reduction_ch_1(c1) + + ################ + + fea = [f_glo[-1], f_p0[-1]] + + if not self.training: + a1 = F.normalize(f_glo[0], p=2, dim=1) + a2 = F.normalize(f_p0[0], p=2, dim=1) + a3 = F.normalize(f_p1[0], p=2, dim=1) + a4 = F.normalize(f_p2[0], p=2, dim=1) + + a5 = F.normalize(f_c0[0], p=2, dim=1) + a6 = F.normalize(f_c1[0], p=2, dim=1) + + return torch.cat([a1, a2, a3, a4, a5, a6], 1) + + return [f_glo[1], f_p0[1], f_p1[1], f_p2[1], f_c0[1], f_c1[1]], fea + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + +if __name__ == '__main__': + # Here I left a simple forward function. + # Test the model, before you train it. + import argparse + + parser = argparse.ArgumentParser(description='MGN') + parser.add_argument('--num_classes', type=int, default=751, help='') + parser.add_argument('--bnneck', type=bool, default=True) + parser.add_argument('--pool', type=str, default='max') + parser.add_argument('--feats', type=int, default=256) + parser.add_argument('--drop_block', type=bool, default=True) + parser.add_argument('--w_ratio', type=float, default=1.0, help='') + parser.add_argument('--h_ratio', type=float, default=0.33, help='') + + + args = parser.parse_args() + net = MCMP_r(args) + # net.classifier = nn.Sequential() + # print([p for p in net.parameters()]) + # a=filter(lambda p: p.requires_grad, net.parameters()) + # print(a) + + print(net) + input = Variable(torch.FloatTensor(8, 3, 384, 128)) + net.eval() + output = net(input) + print(output.shape) + print('net output size:') + # print(len(output)) + # for k in output[0]: + # print(k.shape) + # for k in output[1]: + # print(k.shape) diff --git a/model/mcn.py b/model/mcn.py new file mode 100644 index 000000000..5138561ef --- /dev/null +++ b/model/mcn.py @@ -0,0 +1,297 @@ +import torch +import torch.nn as nn +from torchvision.models.resnet import resnet50, Bottleneck +from torch.autograd import Variable + + +# Defines the new fc layer and classification layer +# |--Linear--|--bn--|--relu--|--Linear--| + + +class ClassBlock(nn.Module): + def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True, return_f=False): + super(ClassBlock, self).__init__() + self.return_f = return_f + add_block = [] + if linear: + add_block += [nn.Linear(input_dim, num_bottleneck)] + else: + num_bottleneck = input_dim + if bnorm: + add_block += [nn.BatchNorm1d(num_bottleneck)] + if relu: + add_block += [nn.LeakyReLU(0.1)] + if droprate > 0: + add_block += [nn.Dropout(p=droprate)] + add_block = nn.Sequential(*add_block) + add_block.apply(self.weights_init_kaiming) + + classifier = [] + classifier += [nn.Linear(num_bottleneck, class_num)] + classifier = nn.Sequential(*classifier) + classifier.apply(self.weights_init_classifier) + + self.add_block = add_block + self.classifier = classifier + + def forward(self, x): + x = self.add_block(x) + if self.return_f: + f = x + x = self.classifier(x) + return f, x + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + # print(classname) + if classname.find('Conv') != -1: + # For old pytorch, you may use kaiming_normal. + nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_out') + nn.init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm1d') != -1: + nn.init.normal_(m.weight.data, 1.0, 0.02) + nn.init.constant_(m.bias.data, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight.data, std=0.001) + nn.init.constant_(m.bias.data, 0.0) + + +class BNNeck(nn.Module): + def __init__(self, input_dim, class_num, return_f=False): + super(BNNeck, self).__init__() + self.return_f = return_f + self.bn = nn.BatchNorm2d(input_dim) + self.bn.bias.requires_grad_(False) + self.classifier = nn.Linear(input_dim, class_num, bias=False) + self.bn.apply(self.weights_init_kaiming) + self.classifier.apply(self.weights_init_classifier) + + def forward(self, x): + before_neck = x + # print(before_neck.shape) + after_neck = self.bn(before_neck).squeeze(3).squeeze(2) + if self.return_f: + score = self.classifier(after_neck) + return after_neck, score, before_neck + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight, std=0.001) + if m.bias: + nn.init.constant_(m.bias, 0.0) + + +class new_BNNeck(nn.Module): + def __init__(self, input_dim, class_num, feat_dim, return_f=False): + super(new_BNNeck, self).__init__() + self.return_f = return_f + # self.reduction = nn.Linear(input_dim, feat_dim) + # self.bn = nn.BatchNorm1d(feat_dim) + + self.reduction = nn.Conv2d( + input_dim, feat_dim, 1, bias=False) + self.bn = nn.BatchNorm2d(feat_dim) + + self.bn.bias.requires_grad_(False) + self.classifier = nn.Linear(feat_dim, class_num, bias=False) + self.bn.apply(self.weights_init_kaiming) + self.classifier.apply(self.weights_init_classifier) + + def forward(self, x): + x = self.reduction(x) + before_neck = x.squeeze(dim=3).squeeze(dim=2) + after_neck = self.bn(x).squeeze(dim=3).squeeze(dim=2) + if self.return_f: + score = self.classifier(after_neck) + return after_neck, score, before_neck + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight, std=0.001) + if m.bias: + nn.init.constant_(m.bias, 0.0) + +# Multi Channel Network + + +class MCN(nn.Module): + def __init__(self, args): + super(MCN, self).__init__() + self.n_c = args.parts + self.chs = 2048 // self.n_c + + resnet_ = resnet50(pretrained=True) + + self.layer0 = nn.Sequential( + resnet_.conv1, + resnet_.bn1, + resnet_.relu, + resnet_.maxpool) + self.layer1 = resnet_.layer1 + self.layer2 = resnet_.layer2 + self.layer3 = resnet_.layer3 + self.layer4 = nn.Sequential( + Bottleneck(1024, 512, downsample=nn.Sequential( + nn.Conv2d(1024, 2048, 1, bias=False), nn.BatchNorm2d(2048))), + Bottleneck(2048, 512), + Bottleneck(2048, 512)) + self.layer4.load_state_dict(resnet_.layer4.state_dict()) + + self.gap = nn.AdaptiveAvgPool2d((1, 1)) + self.global_branch = new_BNNeck(2048, args.num_classes, 256,return_f=True) + + self.shared = nn.Sequential(nn.Conv2d( + self.chs, 256, 1, bias=False), nn.BatchNorm2d(256), nn.ReLU(True)) + self.weights_init_kaiming(self.shared) + + for i in range(self.n_c): + name = 'bnneck_' + str(i) + setattr(self, name, BNNeck(256, args.num_classes, return_f=True)) + + # self.global_branch = new_BNNeck( + # 2048, args.num_classes, args.feats, return_f=True) + # print('PCB_conv divide into {} parts, using {} dims feature.'.format( + # args.parts, args.feats)) + # self.global_branch = BNNeck(2048, args.num_classes, return_f=True) + + def forward(self, x): + x = self.layer0(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.gap(x) + global_feat = self.global_branch(x) + # featself.global_branch + + + # feat_to_global_branch = self.avgpool_before_triplet(x) + # x = self.dropout(x) + # print(x.shape) + part = {} + predict = {} + # get six part feature batchsize*2048*6 + for i in range(self.n_c): + part[i] = x[:, i * self.chs:(i + 1) * self.chs] + part[i] = self.shared(part[i]) + # print(part[i].shape,'kkkkk') + name = 'bnneck_' + str(i) + c = getattr(self, name) + predict[i] = c(part[i]) + # print(predict[i][0].shape,'jjjjj') + + # glfoobal_feat = [x.view(x.size(0), x.size(1), x.size(2))] + + # feat_global_branch = self.global_branch(feat_to_global_branch) + # y = [x.view(x.size(0), -1)] + + score = [global_feat[1]] + after_neck = [global_feat[0]] + # print(y[0].shape) + for i in range(self.n_c): + + score.append(predict[i][1]) + after_neck.append(predict[i][0]) + # print(y[0].shape) + # print(y[0][1].shape) + # return torch.cat([y[0][0],y[1][0],y[2][0],y[3][0],y[4][0],y[5][0]],dim=1),y[0][1],y[1][1],y[2][1],y[3][1],y[4][1],y[5][1] + # return [torch.stack(after_neck,dim=2)]+score+[global_feat_to_triplet] + # print(len(after_neck)) + # print(len(score)) + # print(after_neck[0].shape) + # print(torch.stack(after_neck, dim=2)) + # print(score) + return [torch.stack(after_neck, dim=2), score, [global_feat[-1]]] + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + # def weights_init_classifier(m): + # classname = m.__class__.__name__ + # if classname.find('Linear') != -1: + # nn.init.normal_(m.weight, std=0.001) + # if m.bias: + # nn.init.constant_(m.bias, 0.0) + + +if __name__ == '__main__': + # Here I left a simple forward function. + # Test the model, before you train it. + import argparse + + parser = argparse.ArgumentParser(description='MGN') + parser.add_argument('--num_classes', type=int, default=751, help='') + parser.add_argument('--bnneck', type=bool, default=True) + parser.add_argument('--parts', type=int, default=3) + parser.add_argument('--feats', type=int, default=256) + + args = parser.parse_args() + net = MCN(args) + # net.classifier = nn.Sequential() + # print([p for p in net.parameters()]) + # a=filter(lambda p: p.requires_grad, net.parameters()) + # print(a) + + print(net) + input = Variable(torch.FloatTensor(8, 3, 256, 128)) + output = net(input) + print('net output size:') + print(len(output)) + print(output[0].shape) + for k in output[1]: + print(k.shape) diff --git a/model/mgn.py b/model/mgn.py new file mode 100755 index 000000000..d4cde0f50 --- /dev/null +++ b/model/mgn.py @@ -0,0 +1,160 @@ +import copy + +import torch +from torch import nn +import torch.nn.functional as F + +from torchvision.models.resnet import resnet50, Bottleneck + + +def make_model(args): + return MGN(args) + + +class MGN(nn.Module): + def __init__(self, args): + super(MGN, self).__init__() + num_classes = args.num_classes + + resnet = resnet50(pretrained=True) + + self.backone = nn.Sequential( + resnet.conv1, + resnet.bn1, + resnet.relu, + resnet.maxpool, + resnet.layer1, + resnet.layer2, + resnet.layer3[0], + ) + + res_conv4 = nn.Sequential(*resnet.layer3[1:]) + + res_g_conv5 = resnet.layer4 + + res_p_conv5 = nn.Sequential( + Bottleneck(1024, 512, downsample=nn.Sequential( + nn.Conv2d(1024, 2048, 1, bias=False), nn.BatchNorm2d(2048))), + Bottleneck(2048, 512), + Bottleneck(2048, 512)) + res_p_conv5.load_state_dict(resnet.layer4.state_dict()) + + self.p1 = nn.Sequential(copy.deepcopy( + res_conv4), copy.deepcopy(res_g_conv5)) + self.p2 = nn.Sequential(copy.deepcopy( + res_conv4), copy.deepcopy(res_p_conv5)) + self.p3 = nn.Sequential(copy.deepcopy( + res_conv4), copy.deepcopy(res_p_conv5)) + + if args.pool == 'max': + pool2d = nn.MaxPool2d + elif args.pool == 'avg': + pool2d = nn.AvgPool2d + else: + raise Exception() + + self.maxpool_zg_p1 = pool2d(kernel_size=(12, 4)) + self.maxpool_zg_p2 = pool2d(kernel_size=(24, 8)) + self.maxpool_zg_p3 = pool2d(kernel_size=(24, 8)) + self.maxpool_zp2 = pool2d(kernel_size=(12, 8)) + self.maxpool_zp3 = pool2d(kernel_size=(8, 8)) + + reduction = nn.Sequential(nn.Conv2d( + 2048, args.feats, 1, bias=False), nn.BatchNorm2d(args.feats), nn.ReLU()) + + self._init_reduction(reduction) + self.reduction_0 = copy.deepcopy(reduction) + self.reduction_1 = copy.deepcopy(reduction) + self.reduction_2 = copy.deepcopy(reduction) + self.reduction_3 = copy.deepcopy(reduction) + self.reduction_4 = copy.deepcopy(reduction) + self.reduction_5 = copy.deepcopy(reduction) + self.reduction_6 = copy.deepcopy(reduction) + self.reduction_7 = copy.deepcopy(reduction) + + #self.fc_id_2048_0 = nn.Linear(2048, num_classes) + self.fc_id_2048_0 = nn.Linear(args.feats, num_classes) + self.fc_id_2048_1 = nn.Linear(args.feats, num_classes) + self.fc_id_2048_2 = nn.Linear(args.feats, num_classes) + + self.fc_id_256_1_0 = nn.Linear(args.feats, num_classes) + self.fc_id_256_1_1 = nn.Linear(args.feats, num_classes) + self.fc_id_256_2_0 = nn.Linear(args.feats, num_classes) + self.fc_id_256_2_1 = nn.Linear(args.feats, num_classes) + self.fc_id_256_2_2 = nn.Linear(args.feats, num_classes) + + self._init_fc(self.fc_id_2048_0) + self._init_fc(self.fc_id_2048_1) + self._init_fc(self.fc_id_2048_2) + + self._init_fc(self.fc_id_256_1_0) + self._init_fc(self.fc_id_256_1_1) + self._init_fc(self.fc_id_256_2_0) + self._init_fc(self.fc_id_256_2_1) + self._init_fc(self.fc_id_256_2_2) + + @staticmethod + def _init_reduction(reduction): + # conv + nn.init.kaiming_normal_(reduction[0].weight, mode='fan_in') + #nn.init.constant_(reduction[0].bias, 0.) + + # bn + nn.init.normal_(reduction[1].weight, mean=1., std=0.02) + nn.init.constant_(reduction[1].bias, 0.) + + @staticmethod + def _init_fc(fc): + nn.init.kaiming_normal_(fc.weight, mode='fan_out') + #nn.init.normal_(fc.weight, std=0.001) + nn.init.constant_(fc.bias, 0.) + + def forward(self, x): + + x = self.backone(x) + + p1 = self.p1(x) + p2 = self.p2(x) + p3 = self.p3(x) + + zg_p1 = self.maxpool_zg_p1(p1) # shape:(batchsize, 2048,1,1) + zg_p2 = self.maxpool_zg_p2(p2) # shape:(batchsize, 2048,1,1) + zg_p3 = self.maxpool_zg_p3(p3) # shape:(batchsize, 2048,1,1) + + zp2 = self.maxpool_zp2(p2) # shape:(batchsize, 2048,2,1) + z0_p2 = zp2[:, :, 0:1, :] + z1_p2 = zp2[:, :, 1:2, :] + + zp3 = self.maxpool_zp3(p3) # shape:(batchsize, 2048,3,1) + z0_p3 = zp3[:, :, 0:1, :] + z1_p3 = zp3[:, :, 1:2, :] + z2_p3 = zp3[:, :, 2:3, :] + + fg_p1 = self.reduction_0(zg_p1).squeeze(dim=3).squeeze(dim=2) + fg_p2 = self.reduction_1(zg_p2).squeeze(dim=3).squeeze(dim=2) + fg_p3 = self.reduction_2(zg_p3).squeeze(dim=3).squeeze(dim=2) + f0_p2 = self.reduction_3(z0_p2).squeeze(dim=3).squeeze(dim=2) + f1_p2 = self.reduction_4(z1_p2).squeeze(dim=3).squeeze(dim=2) + f0_p3 = self.reduction_5(z0_p3).squeeze(dim=3).squeeze(dim=2) + f1_p3 = self.reduction_6(z1_p3).squeeze(dim=3).squeeze(dim=2) + f2_p3 = self.reduction_7(z2_p3).squeeze(dim=3).squeeze(dim=2) + + ''' + l_p1 = self.fc_id_2048_0(zg_p1.squeeze(dim=3).squeeze(dim=2)) + l_p2 = self.fc_id_2048_1(zg_p2.squeeze(dim=3).squeeze(dim=2)) + l_p3 = self.fc_id_2048_2(zg_p3.squeeze(dim=3).squeeze(dim=2)) + ''' + l_p1 = self.fc_id_2048_0(fg_p1) + l_p2 = self.fc_id_2048_1(fg_p2) + l_p3 = self.fc_id_2048_2(fg_p3) + + l0_p2 = self.fc_id_256_1_0(f0_p2) + l1_p2 = self.fc_id_256_1_1(f1_p2) + l0_p3 = self.fc_id_256_2_0(f0_p3) + l1_p3 = self.fc_id_256_2_1(f1_p3) + l2_p3 = self.fc_id_256_2_2(f2_p3) + + predict = torch.cat([fg_p1, fg_p2, fg_p3, f0_p2, + f1_p2, f0_p3, f1_p3, f2_p3], dim=1) + # print(predict.shape) + return predict, fg_p1, fg_p2, fg_p3, l_p1, l_p2, l_p3, l0_p2, l1_p2, l0_p3, l1_p3, l2_p3 diff --git a/model/osnet.py b/model/osnet.py new file mode 100644 index 000000000..7d7b4fa36 --- /dev/null +++ b/model/osnet.py @@ -0,0 +1,441 @@ +from __future__ import absolute_import +from __future__ import division + +__all__ = ['osnet_x1_25', 'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'] + +import torch +from torch import nn +from torch.nn import functional as F +import torchvision + + +pretrained_urls = { + 'osnet_x1_0': 'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY', + 'osnet_x0_75': 'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq', + 'osnet_x0_5': 'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i', + 'osnet_x0_25': 'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs', + 'osnet_ibn_x1_0': 'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l' +} + + +########## +# Basic layers +########## +class ConvLayer(nn.Module): + """Convolution layer (conv + bn + relu).""" + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False): + super(ConvLayer, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, + padding=padding, bias=False, groups=groups) + if IN: + self.bn = nn.InstanceNorm2d(out_channels, affine=True) + else: + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Conv1x1(nn.Module): + """1x1 convolution + bn + relu.""" + + def __init__(self, in_channels, out_channels, stride=1, groups=1): + super(Conv1x1, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, + bias=False, groups=groups) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Conv1x1Linear(nn.Module): + """1x1 convolution + bn (w/o non-linearity).""" + + def __init__(self, in_channels, out_channels, stride=1): + super(Conv1x1Linear, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class Conv3x3(nn.Module): + """3x3 convolution + bn + relu.""" + + def __init__(self, in_channels, out_channels, stride=1, groups=1): + super(Conv3x3, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, + bias=False, groups=groups) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class LightConv3x3(nn.Module): + """Lightweight 3x3 convolution. + 1x1 (linear) + dw 3x3 (nonlinear). + """ + + def __init__(self, in_channels, out_channels): + super(LightConv3x3, self).__init__() + self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False) + self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.bn(x) + x = self.relu(x) + return x + + +########## +# Building blocks for omni-scale feature learning +########## +class ChannelGate(nn.Module): + """A mini-network that generates channel-wise gates conditioned on input tensor.""" + + def __init__(self, in_channels, num_gates=None, return_gates=False, + gate_activation='sigmoid', reduction=16, layer_norm=False): + super(ChannelGate, self).__init__() + if num_gates is None: + num_gates = in_channels + self.return_gates = return_gates + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Conv2d(in_channels, in_channels//reduction, kernel_size=1, bias=True, padding=0) + self.norm1 = None + if layer_norm: + self.norm1 = nn.LayerNorm((in_channels//reduction, 1, 1)) + self.relu = nn.ReLU(inplace=True) + self.fc2 = nn.Conv2d(in_channels//reduction, num_gates, kernel_size=1, bias=True, padding=0) + if gate_activation == 'sigmoid': + self.gate_activation = nn.Sigmoid() + elif gate_activation == 'relu': + self.gate_activation = nn.ReLU(inplace=True) + elif gate_activation == 'linear': + self.gate_activation = None + else: + raise RuntimeError("Unknown gate activation: {}".format(gate_activation)) + + def forward(self, x): + input = x + x = self.global_avgpool(x) + x = self.fc1(x) + if self.norm1 is not None: + x = self.norm1(x) + x = self.relu(x) + x = self.fc2(x) + if self.gate_activation is not None: + x = self.gate_activation(x) + if self.return_gates: + return x + return input * x + + +class OSBlock(nn.Module): + """Omni-scale feature learning block.""" + + def __init__(self, in_channels, out_channels, IN=False, bottleneck_reduction=4, **kwargs): + super(OSBlock, self).__init__() + mid_channels = out_channels // bottleneck_reduction + self.conv1 = Conv1x1(in_channels, mid_channels) + self.conv2a = LightConv3x3(mid_channels, mid_channels) + self.conv2b = nn.Sequential( + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + ) + self.conv2c = nn.Sequential( + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + ) + self.conv2d = nn.Sequential( + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + ) + self.gate = ChannelGate(mid_channels) + self.conv3 = Conv1x1Linear(mid_channels, out_channels) + self.downsample = None + if in_channels != out_channels: + self.downsample = Conv1x1Linear(in_channels, out_channels) + self.IN = None + if IN: + self.IN = nn.InstanceNorm2d(out_channels, affine=True) + + def forward(self, x): + identity = x + x1 = self.conv1(x) + x2a = self.conv2a(x1) + x2b = self.conv2b(x1) + x2c = self.conv2c(x1) + x2d = self.conv2d(x1) + x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d) + x3 = self.conv3(x2) + if self.downsample is not None: + identity = self.downsample(identity) + out = x3 + identity + if self.IN is not None: + out = self.IN(out) + return F.relu(out) + + +########## +# Network architecture +########## +class OSNet(nn.Module): + """Omni-Scale Network. + + Reference: + - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019. + """ + + def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss='softmax', IN=False, **kwargs): + super(OSNet, self).__init__() + num_blocks = len(blocks) + assert num_blocks == len(layers) + assert num_blocks == len(channels) - 1 + self.loss = loss + + # convolutional backbone + self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN) + self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) + self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1], reduce_spatial_size=True, IN=IN) + self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True) + self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False) + self.conv5 = Conv1x1(channels[3], channels[3]) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + # fully connected layer + self.fc = self._construct_fc_layer(feature_dim, channels[3], dropout_p=None) + # identity classification layer + self.classifier = nn.Linear(self.feature_dim, num_classes) + + self._init_params() + + def _make_layer(self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False): + layers = [] + + layers.append(block(in_channels, out_channels, IN=IN)) + for i in range(1, layer): + layers.append(block(out_channels, out_channels, IN=IN)) + + if reduce_spatial_size: + layers.append( + nn.Sequential( + Conv1x1(out_channels, out_channels), + nn.AvgPool2d(2, stride=2) + ) + ) + + return nn.Sequential(*layers) + + def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): + if fc_dims is None or fc_dims<0: + self.feature_dim = input_dim + return None + + if isinstance(fc_dims, int): + fc_dims = [fc_dims] + + layers = [] + for dim in fc_dims: + layers.append(nn.Linear(input_dim, dim)) + layers.append(nn.BatchNorm1d(dim)) + layers.append(nn.ReLU(inplace=True)) + if dropout_p is not None: + layers.append(nn.Dropout(p=dropout_p)) + input_dim = dim + + self.feature_dim = fc_dims[-1] + + return nn.Sequential(*layers) + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def featuremaps(self, x): + x = self.conv1(x) + x = self.maxpool(x) + x = self.conv2(x) + x = self.conv3(x) + x = self.conv4(x) + x = self.conv5(x) + return x + + def forward(self, x, return_featuremaps=False): + x = self.featuremaps(x) + if return_featuremaps: + return x + v = self.global_avgpool(x) + v = v.view(v.size(0), -1) + if self.fc is not None: + v = self.fc(v) + if not self.training: + return v + y = self.classifier(v) + if self.loss == 'softmax': + return y + elif self.loss == 'triplet': + return y, v + else: + raise KeyError("Unsupported loss: {}".format(self.loss)) + + +def init_pretrained_weights(model, key=''): + """Initializes model with pretrained weights. + + Layers that don't match with pretrained layers in name or size are kept unchanged. + """ + import os + import errno + import gdown + from collections import OrderedDict + + def _get_torch_home(): + ENV_TORCH_HOME = 'TORCH_HOME' + ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' + DEFAULT_CACHE_DIR = '~/.cache' + torch_home = os.path.expanduser( + os.getenv(ENV_TORCH_HOME, + os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'))) + return torch_home + + torch_home = _get_torch_home() + model_dir = os.path.join(torch_home, 'checkpoints') + try: + os.makedirs(model_dir) + except OSError as e: + if e.errno == errno.EEXIST: + # Directory already exists, ignore. + pass + else: + # Unexpected OSError, re-raise. + raise + filename = key + '_imagenet.pth' + cached_file = os.path.join(model_dir, filename) + + if not os.path.exists(cached_file): + gdown.download(pretrained_urls[key], cached_file, quiet=False) + + state_dict = torch.load(cached_file) + model_dict = model.state_dict() + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + + for k, v in state_dict.items(): + if k.startswith('module.'): + k = k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + warnings.warn( + 'The pretrained weights from "{}" cannot be loaded, ' + 'please check the key names manually ' + '(** ignored and continue **)'.format(cached_file)) + else: + print('Successfully loaded imagenet pretrained weights from "{}"'.format(cached_file)) + if len(discarded_layers) > 0: + print('** The following layers are discarded ' + 'due to unmatched keys or layer size: {}'.format(discarded_layers)) + + +########## +# Instantiation +########## +def osnet_x1_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs): + # standard size (width x1.25) + return OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], + channels=[80, 320, 480, 640], loss=loss, **kwargs) + if pretrained: + init_pretrained_weights(model, key='osnet_x1_25') + +def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs): + # standard size (width x1.0) + model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], + channels=[64, 256, 384, 512], loss=loss, **kwargs) + if pretrained: + init_pretrained_weights(model, key='osnet_x1_0') + return model + + +def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs): + # medium size (width x0.75) + model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], + channels=[48, 192, 288, 384], loss=loss, **kwargs) + if pretrained: + init_pretrained_weights(model, key='osnet_x0_75') + return model + + +def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs): + # tiny size (width x0.5) + model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], + channels=[32, 128, 192, 256], loss=loss, **kwargs) + if pretrained: + init_pretrained_weights(model, key='osnet_x0_5') + return model + + +def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs): + # very tiny size (width x0.25) + model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], + channels=[16, 64, 96, 128], loss=loss, **kwargs) + if pretrained: + init_pretrained_weights(model, key='osnet_x0_25') + return model + + +def osnet_ibn_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs): + # standard size (width x1.0) + IBN layer + # Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018. + model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], + channels=[64, 256, 384, 512], loss=loss, IN=True, **kwargs) + if pretrained: + init_pretrained_weights(model, key='osnet_ibn_x1_0') + return model \ No newline at end of file diff --git a/model/pcb.py b/model/pcb.py new file mode 100644 index 000000000..5790788db --- /dev/null +++ b/model/pcb.py @@ -0,0 +1,111 @@ +import torch +import torch.nn as nn +from torchvision.models.resnet import resnet50, Bottleneck +from torch.autograd import Variable +from .bnneck import BNNeck, BNNeck3, ClassBlock + + +class PCB(nn.Module): + def __init__(self, args): + super(PCB, self).__init__() + + self.part = args.parts # We cut the pool5 to 6 parts + model_ft = resnet50(pretrained=True) + self.model = model_ft + self.avgpool = nn.AdaptiveAvgPool2d((self.part, 1)) + + self.avgpool_before_triplet = nn.AdaptiveAvgPool2d((1, 1)) + self.dropout = nn.Dropout(p=0.5) + # remove the final downsample + self.model.layer4[0].downsample[0].stride = (1, 1) + self.model.layer4[0].conv2.stride = (1, 1) + self.bnneck = args.bnneck + # define 6 classifiers + for i in range(self.part): + name = 'classifier' + str(i) + if self.bnneck: + # setattr(self, name, BNNeck(2048, args.num_classes, return_f=True)) + setattr(self, name, BNNeck3( + 2048, args.num_classes, args.feats, return_f=True)) + else: + + setattr(self, name, ClassBlock(2048, args.num_classes, droprate=0.5, + relu=False, bnorm=True, num_bottleneck=args.feats, return_f=True)) + self.global_branch = BNNeck3( + 2048, args.num_classes, args.feats, return_f=True) + print('PCB_conv divide into {} parts, using {} dims feature.'.format( + args.parts, args.feats)) + # self.global_branch = BNNeck(2048, args.num_classes, return_f=True) + + def forward(self, x): + x = self.model.conv1(x) + x = self.model.bn1(x) + x = self.model.relu(x) + x = self.model.maxpool(x) + + x = self.model.layer1(x) + x = self.model.layer2(x) + x = self.model.layer3(x) + x = self.model.layer4(x) + feat_to_global_branch = self.avgpool_before_triplet(x) + x = self.avgpool(x) + x = self.dropout(x) + # print(x.shape) + part = {} + predict = {} + # get six part feature batchsize*2048*6 + for i in range(self.part): + part[i] = x[:, :, i].unsqueeze(dim=3) + # part[i] = torch.squeeze(x[:, :,:, i]) + + name = 'classifier' + str(i) + c = getattr(self, name) + predict[i] = c(part[i]) + + global_feat = [x.view(x.size(0), x.size(1), x.size(2))] + + feat_global_branch = self.global_branch(feat_to_global_branch) + # y = [x.view(x.size(0), -1)] + + score = [] + after_neck = [] + # print(y[0].shape) + for i in range(self.part): + + score.append(predict[i][1]) + if self.bnneck: + + after_neck.append(predict[i][0]) + + if not self.training: + return torch.stack(after_neck + [feat_global_branch[0]], dim=2) + return score + [feat_global_branch[1]], feat_global_branch[-1] + + +if __name__ == '__main__': + # Here I left a simple forward function. + # Test the model, before you train it. + import argparse + + parser = argparse.ArgumentParser(description='MGN') + parser.add_argument('--num_classes', type=int, default=751, help='') + parser.add_argument('--bnneck', type=bool, default=True) + parser.add_argument('--parts', type=int, default=3) + parser.add_argument('--feats', type=int, default=256) + + args = parser.parse_args() + net = PCB(args) + # net.classifier = nn.Sequential() + # print([p for p in net.parameters()]) + # a=filter(lambda p: p.requires_grad, net.parameters()) + # print(a) + net.eval() + print(net) + input = Variable(torch.FloatTensor(8, 3, 256, 128)) + output = net(input) + print('net output size:') + print(len(output)) + print(output.shape) + # for k in output[0]: + # print(k.shape) + # print(output[-1].shape) diff --git a/model/pyramid.py b/model/pyramid.py new file mode 100755 index 000000000..97cb85306 --- /dev/null +++ b/model/pyramid.py @@ -0,0 +1,200 @@ +import torch +from torch import nn, optim +import torch.nn.init as init +import torch.nn.functional as F +from .resnet_pyramid import resnet101 +from torch.autograd import Variable + + + +class Pyramid(nn.Module): + def __init__( + self, + args, + last_conv_stride=1, + last_conv_dilation=1, + num_stripes=6, # number of sub-parts + used_levels=[1, 1, 1, 1, 1, 1], + num_conv_out_channels=128, + global_conv_out_channels=256, + ): + + super(Pyramid, self).__init__() + + print("num_stripes:{}".format(num_stripes)) + print("num_conv_out_channels:{},".format(num_conv_out_channels)) + + self.base = resnet101( + pretrained=True, + last_conv_stride=last_conv_stride, + last_conv_dilation=last_conv_dilation) + + self.dropout_layer = nn.Dropout(p=0.2) + + # ============================================================================== pyramid + self.num_classes = args.num_classes + self.num_stripes = num_stripes + self.used_levels = used_levels + + # ==============================================================================pyramid + input_size = 2048 + self.pyramid_conv_list0 = nn.ModuleList() + self.pyramid_fc_list0 = nn.ModuleList() + Pyramid.register_basic_branch(self, num_conv_out_channels, + input_size, + self.pyramid_conv_list0, + self.pyramid_fc_list0) + + # ==============================================================================pyramid + input_size1 = 1024 + self.pyramid_conv_list1 = nn.ModuleList() + self.pyramid_fc_list1 = nn.ModuleList() + Pyramid.register_basic_branch(self, num_conv_out_channels, + input_size1, + self.pyramid_conv_list1, + self.pyramid_fc_list1) + + def forward(self, x): + """ + Returns: + feat_list: each member with shape [N, C] + logits_list: each member with shape [N, num_classes] + """ + # shape [N, C, H, W] + feat = self.base(x) + # print(feat.shape) + + assert feat.size(2) % self.num_stripes == 0 + + # ============================================================================== pyramid + feat_list = [] + logits_list = [] + + Pyramid.pyramid_forward(self, feat, + self.pyramid_conv_list0, + self.pyramid_fc_list0, + feat_list, + logits_list) + + return [torch.stack(feat_list,dim=2)]+logits_list + # ============================================================================== pyramid + + @staticmethod + def register_basic_branch(self, num_conv_out_channels, + input_size, + pyramid_conv_list, + pyramid_fc_list): + # the level indexes are defined from fine to coarse, + # the branch will contain one more part than that of its previous level + # the sliding step is set to 1 + self.num_in_each_level = [i for i in range(self.num_stripes, 0, -1)] + self.num_levels = len(self.num_in_each_level) + self.num_branches = sum(self.num_in_each_level) + + idx_levels = 0 + for idx_branches in range(self.num_branches): + if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]): + idx_levels += 1 + + if self.used_levels[idx_levels] == 0: + continue + + pyramid_conv_list.append(nn.Sequential( + nn.Conv2d(input_size, num_conv_out_channels, 1), + nn.BatchNorm2d(num_conv_out_channels), + nn.ReLU(inplace=True))) + + # ============================================================================== pyramid + idx_levels = 0 + for idx_branches in range(self.num_branches): + if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]): + idx_levels += 1 + + if self.used_levels[idx_levels] == 0: + continue + + fc = nn.Linear(num_conv_out_channels, self.num_classes) + init.normal_(fc.weight, std=0.001) + init.constant_(fc.bias, 0) + pyramid_fc_list.append(fc) + + @staticmethod + def pyramid_forward(self, feat, + pyramid_conv_list, + pyramid_fc_list, + feat_list, + logits_list): + + basic_stripe_size = int(feat.size(2) / self.num_stripes) + + idx_levels = 0 + used_branches = 0 + for idx_branches in range(self.num_branches): + + if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]): + idx_levels += 1 + + if self.used_levels[idx_levels] == 0: + continue + + idx_in_each_level = idx_branches - sum(self.num_in_each_level[0:idx_levels]) + + stripe_size_in_level = basic_stripe_size * (idx_levels + 1) + + st = idx_in_each_level * basic_stripe_size + ed = st + stripe_size_in_level + + local_feat = F.avg_pool2d(feat[:, :, st: ed, :], + (stripe_size_in_level, feat.size(-1))) + F.max_pool2d(feat[:, :, st: ed, :], + (stripe_size_in_level, + feat.size(-1))) + + local_feat = pyramid_conv_list[used_branches](local_feat) + local_feat = local_feat.view(local_feat.size(0), -1) + feat_list.append(local_feat) + + local_logits = pyramid_fc_list[used_branches](self.dropout_layer(local_feat)) + logits_list.append(local_logits) + + used_branches += 1 + + +def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True, strict=True): + map_location = (lambda storage, loc: storage) if load_to_cpu else None + ckpt = torch.load(ckpt_file, map_location=map_location) + modules_optims[0].load_state_dict(ckpt['state_dicts'][0], strict=strict) + modules_optims[1].load_state_dict(ckpt['state_dicts'][1]) + + if verbose: + print('Resume from ckpt {}, \nepoch {}, \nscores {}'.format( + ckpt_file, ckpt['ep'], ckpt['scores'])) + return ckpt['ep'], ckpt['scores'] + +if __name__ == '__main__': + market_classes = 751 + duke_classes = 702 + cuhk_classes = 767 + + model = Pyramid(num_classes=market_classes) + # model = model.cuda() + finetuned_params = list(model.base.parameters()) + # To train from scratch + new_params = [p for n, p in model.named_parameters() + if not n.startswith('base.')] + param_groups = [{'params': finetuned_params, 'lr': 0.01}, + {'params': new_params, 'lr': 0.1}] + optimizer = optim.SGD(param_groups, momentum=0.9, weight_decay=5e-4) + + modules_optims = [model, optimizer] + input = Variable(torch.FloatTensor(8, 3, 384, 128)) + output = model(input) + print('net output size:') + print(len(output)) + print(output[0].shape) + print(output[2].shape) + + + + + # resume_ep, scores = load_ckpt(modules_optims, './market/ckpt_ep112_re02_bs64_dropout02_GPU0_mAP0.882439013042_market.pth') + print('Resume from EP: {}'.format(resume_ep)) diff --git a/model/resnet50.py b/model/resnet50.py new file mode 100644 index 000000000..05c11126c --- /dev/null +++ b/model/resnet50.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn +from torchvision.models.resnet import resnet50, Bottleneck +import random +from .bnneck import BNNeck, BNNeck3, ClassBlock +from torch.autograd import Variable + + +# Defines the new fc layer and classification layer +# |--Linear--|--bn--|--relu--|--Linear--| + +class BatchDrop(nn.Module): + def __init__(self, h_ratio, w_ratio): + super(BatchDrop, self).__init__() + self.h_ratio = h_ratio + self.w_ratio = w_ratio + + def forward(self, x): + if self.training: + h, w = x.size()[-2:] + rh = round(self.h_ratio * h) + rw = round(self.w_ratio * w) + sx = random.randint(0, h - rh) + sy = random.randint(0, w - rw) + mask = x.new_ones(x.size()) + mask[:, :, sx:sx + rh, sy:sy + rw] = 0 + x = x * mask + return x + + + +# Define the ResNet50-based Model +class ResNet50(nn.Module): + + # def __init__(self, class_num, droprate=0.5, stride=2): + def __init__(self, args, droprate=0.5, stride=2): + + super(ResNet50, self).__init__() + resnet = resnet50(pretrained=True) + # avg pooling to global pooling + if stride == 1: + resnet.layer4[0].downsample[0].stride = (1, 1) + resnet.layer4[0].conv2.stride = (1, 1) + resnet.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.model = resnet + self.bnneck = args.bnneck + self.drop_block = args.drop_block + if args.drop_block: + print('Using batch drop block.') + resnet.avgpool = nn.AdaptiveMaxPool2d((1, 1)) + self.batch_drop_block = BatchDrop(h_ratio=args.h_ratio, w_ratio=args.w_ratio) + if self.bnneck: + self.classifier = BNNeck(2048, args.num_classes, return_f=True) + # self.classifier = new_BNNeck(2048, args.num_classes, 256, return_f=True) + + else: + + self.classifier = ClassBlock( + 2048, args.num_classes, droprate, return_f=True) + + def forward(self, x): + x = self.model.conv1(x) + x = self.model.bn1(x) + x = self.model.relu(x) + x = self.model.maxpool(x) + x = self.model.layer1(x) + x = self.model.layer2(x) + x = self.model.layer3(x) + x = self.model.layer4(x) + if self.drop_block: + x = self.batch_drop_block(x) + x = self.model.avgpool(x) + # x = x.view(x.size(0), x.size(1)) + # print(x.shape) + x = self.classifier(x) + # print(x[0].shape) + # print(x[1].shape) + # print(x[2].shape) + + + + if not self.training: + return x[0] + + return x[1],x[-1] +if __name__ == '__main__': + # Here I left a simple forward function. + # Test the model, before you train it. + import argparse + + parser = argparse.ArgumentParser(description='MGN') + parser.add_argument('--num_classes', type=int, default=751, help='') + parser.add_argument('--bnneck', type=bool, default=False) + parser.add_argument('--pool', type=str, default='max') + parser.add_argument('--feats', type=int, default=256) + parser.add_argument('--drop_block', type=bool, default=True) + parser.add_argument('--w_ratio', type=float, default=1.0, help='') + parser.add_argument('--h_ratio', type=float, default=0.33, help='') + + + args = parser.parse_args() + net = ResNet50(args) + # net.classifier = nn.Sequential() + # print([p for p in net.parameters()]) + # a=filter(lambda p: p.requires_grad, net.parameters()) + # print(a) + + print(net) + input = Variable(torch.FloatTensor(8, 3, 384, 128)) + net.eval() + output = net(input) + print(output.shape) + print('net output size:') + # print(len(output)) + # for k in output[0]: + # print(k.shape) + # for k in output[1]: + diff --git a/model/resnet501.py b/model/resnet501.py new file mode 100644 index 000000000..0b667ad4d --- /dev/null +++ b/model/resnet501.py @@ -0,0 +1,218 @@ +import torch +import torch.nn as nn +from torchvision.models.resnet import resnet50, Bottleneck +import random + + +# Defines the new fc layer and classification layer +# |--Linear--|--bn--|--relu--|--Linear--| + +class BatchDrop(nn.Module): + def __init__(self, h_ratio, w_ratio): + super(BatchDrop, self).__init__() + self.h_ratio = h_ratio + self.w_ratio = w_ratio + + def forward(self, x): + if self.training: + h, w = x.size()[-2:] + rh = round(self.h_ratio * h) + rw = round(self.w_ratio * w) + sx = random.randint(0, h - rh) + sy = random.randint(0, w - rw) + mask = x.new_ones(x.size()) + mask[:, :, sx:sx + rh, sy:sy + rw] = 0 + x = x * mask + return x + +class ClassBlock(nn.Module): + def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True, return_f=False): + super(ClassBlock, self).__init__() + self.return_f = return_f + add_block = [] + if linear: + add_block += [nn.Linear(input_dim, num_bottleneck)] + else: + num_bottleneck = input_dim + if bnorm: + add_block += [nn.BatchNorm1d(num_bottleneck)] + if relu: + add_block += [nn.LeakyReLU(0.1)] + if droprate > 0: + add_block += [nn.Dropout(p=droprate)] + add_block = nn.Sequential(*add_block) + add_block.apply(self.weights_init_kaiming) + + classifier = [] + classifier += [nn.Linear(num_bottleneck, class_num)] + classifier = nn.Sequential(*classifier) + classifier.apply(self.weights_init_classifier) + + self.add_block = add_block + self.classifier = classifier + + def forward(self, x): + x = self.add_block(x) + if self.return_f: + f = x + x = self.classifier(x) + return f, x + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + # print(classname) + if classname.find('Conv') != -1: + # For old pytorch, you may use kaiming_normal. + nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_out') + nn.init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm1d') != -1: + nn.init.normal_(m.weight.data, 1.0, 0.02) + nn.init.constant_(m.bias.data, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight.data, std=0.001) + nn.init.constant_(m.bias.data, 0.0) + + +class BNNeck(nn.Module): + def __init__(self, input_dim, class_num, return_f=False): + super(BNNeck, self).__init__() + self.return_f = return_f + self.bn = nn.BatchNorm1d(input_dim) + self.bn.bias.requires_grad_(False) + self.classifier = nn.Linear(input_dim, class_num, bias=False) + self.bn.apply(self.weights_init_kaiming) + self.classifier.apply(self.weights_init_classifier) + + def forward(self, x): + before_neck = x + after_neck = self.bn(before_neck) + if self.return_f: + score = self.classifier(after_neck) + return after_neck, score, before_neck + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight, std=0.001) + if m.bias: + nn.init.constant_(m.bias, 0.0) + + +class new_BNNeck(nn.Module): + def __init__(self, input_dim, class_num, feat_dim, return_f=False): + super(new_BNNeck, self).__init__() + self.return_f = return_f + self.reduction = nn.Linear(input_dim, feat_dim) + self.bn = nn.BatchNorm1d(feat_dim) + self.bn.bias.requires_grad_(False) + self.classifier = nn.Linear(feat_dim, class_num, bias=False) + self.bn.apply(self.weights_init_kaiming) + self.classifier.apply(self.weights_init_classifier) + + def forward(self, x): + x = self.reduction(x) + before_neck = x + after_neck = self.bn(before_neck) + if self.return_f: + score = self.classifier(after_neck) + return after_neck, score, before_neck + else: + x = self.classifier(x) + return x + + def weights_init_kaiming(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') + nn.init.constant_(m.bias, 0.0) + elif classname.find('Conv') != -1: + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + if m.bias is not None: + nn.init.constant_(m.bias, 0.0) + elif classname.find('BatchNorm') != -1: + if m.affine: + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0.0) + + def weights_init_classifier(self, m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + nn.init.normal_(m.weight, std=0.001) + if m.bias: + nn.init.constant_(m.bias, 0.0) + + +# Define the ResNet50-based Model +class ResNet501(nn.Module): + + # def __init__(self, class_num, droprate=0.5, stride=2): + def __init__(self, args, droprate=0.5, stride=2): + + super(ResNet501, self).__init__() + resnet = resnet50(pretrained=True) + # avg pooling to global pooling + if stride == 1: + resnet.layer4[0].downsample[0].stride = (1, 1) + resnet.layer4[0].conv2.stride = (1, 1) + resnet.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.model = resnet + self.bnneck = args.bnneck + self.drop_block = args.drop_block + if args.drop_block: + print('Using batch drop block.') + resnet.avgpool = nn.AdaptiveMaxPool2d((1, 1)) + self.batch_drop_block = BatchDrop(h_ratio=args.h_ratio, w_ratio=args.w_ratio) + if self.bnneck: + self.classifier = BNNeck(2048, args.num_classes, return_f=True) + # self.classifier = new_BNNeck(2048, args.num_classes, 256, return_f=True) + + else: + + self.classifier = ClassBlock( + 2048, args.num_classes, droprate, return_f=True) + + def forward(self, x): + x = self.model.conv1(x) + x = self.model.bn1(x) + x = self.model.relu(x) + x = self.model.maxpool(x) + x = self.model.layer1(x) + x = self.model.layer2(x) + x = self.model.layer3(x) + x = self.model.layer4(x) + if self.drop_block: + x = self.batch_drop_block(x) + x = self.model.avgpool(x) + x = x.view(x.size(0), x.size(1)) + + x = self.classifier(x) + if not self.training: + return x[0] + + return x[1],x[-1] + diff --git a/model/se_resnet.py b/model/se_resnet.py new file mode 100644 index 000000000..c4022e0f2 --- /dev/null +++ b/model/se_resnet.py @@ -0,0 +1,258 @@ +import torch.nn as nn +import torch +import math +import torch.utils.model_zoo as model_zoo +from torch.autograd import Variable + + + +__all__ = ['SENet', 'se_resnet_18', 'se_resnet_34', 'se_resnet_50', 'se_resnet_101', + 'se_resnet_152'] + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + if planes == 64: + self.globalAvgPool = nn.AvgPool2d(56, stride=1) + elif planes == 128: + self.globalAvgPool = nn.AvgPool2d(28, stride=1) + elif planes == 256: + self.globalAvgPool = nn.AvgPool2d(14, stride=1) + elif planes == 512: + self.globalAvgPool = nn.AvgPool2d(7, stride=1) + self.fc1 = nn.Linear(in_features=planes, out_features=round(planes / 16)) + self.fc2 = nn.Linear(in_features=round(planes / 16), out_features=planes) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + original_out = out + out = self.globalAvgPool(out) + out = out.view(out.size(0), -1) + out = self.fc1(out) + out = self.relu(out) + out = self.fc2(out) + out = self.sigmoid(out) + out = out.view(out.size(0), out.size(1), 1, 1) + out = out * original_out + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + if planes == 64: + self.globalAvgPool = nn.AvgPool2d(56, stride=1) + elif planes == 128: + self.globalAvgPool = nn.AvgPool2d(28, stride=1) + elif planes == 256: + self.globalAvgPool = nn.AvgPool2d(14, stride=1) + elif planes == 512: + self.globalAvgPool = nn.AvgPool2d(7, stride=1) + self.fc1 = nn.Linear(in_features=planes * 4, out_features=round(planes / 4)) + self.fc2 = nn.Linear(in_features=round(planes / 4), out_features=planes * 4) + self.sigmoid = nn.Sigmoid() + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + original_out = out + print(original_out.shape, 'original_out') + out = self.globalAvgPool(out) + out = out.view(out.size(0), -1) + print(out.shape,'jjjjjjj') + out = self.fc1(out) + out = self.relu(out) + out = self.fc2(out) + print(out.shape,'ddddddd') + + out = self.sigmoid(out) + out = out.view(out.size(0),out.size(1),1,1) + print(out.shape,'rrrrrrrrrrr') + + out = out * original_out + + out += residual + out = self.relu(out) + + return out + + +class SENet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(SENet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + print(x.shape) + + x = self.layer1(x) + print(x.shape,'11111111') + x = self.layer2(x) + print(x.shape,'22222') + + x = self.layer3(x) + print(x.shape,'33333333') + + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +def se_resnet_18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = SENet(BasicBlock, [2, 2, 2, 2], **kwargs) + return model + + +def se_resnet_34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = SENet(BasicBlock, [3, 4, 6, 3], **kwargs) + return model + + +def se_resnet_50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = SENet(Bottleneck, [3, 4, 6, 3], **kwargs) + return model + + +def se_resnet_101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = SENet(Bottleneck, [3, 4, 23, 3], **kwargs) + return model + + +def se_resnet_152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = SENet(Bottleneck, [3, 8, 36, 3], **kwargs) + return model + +if __name__ == '__main__': + net = se_resnet_50(num_classes=751) + # net.classifier = nn.Sequential() + # print([p for p in net.parameters()]) + # a=filter(lambda p: p.requires_grad, net.parameters()) + # print(a) + + print(net) + input = Variable(torch.FloatTensor(8, 3, 224, 224)) + output = net(input) + print('net output size:') + print(output.shape) diff --git "a/optim/Icon\r" "b/optim/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/optim/__init__.py b/optim/__init__.py new file mode 100644 index 000000000..053e35c7c --- /dev/null +++ b/optim/__init__.py @@ -0,0 +1,121 @@ +import torch.optim as optim +import torch.optim.lr_scheduler as lrs +from .n_adam import NAdam +from .warmup_scheduler import WarmupMultiStepLR + + +def make_optimizer(args, model): + trainable = filter(lambda x: x.requires_grad, model.parameters()) + if args.model in ['PCB', 'PCB_v', 'PCB_conv']: + ignored_params = [] + for i in range(args.parts): + name = 'classifier' + str(i) + c = getattr(model.model, name) + ignored_params = ignored_params + list(map(id, c.parameters())) + + # ignored_params = (list(map(id, model.model.classifier0.parameters())) + # + list(map(id, model.model.classifier1.parameters())) + # # + list(map(id, model.model.classifier2.parameters())) + # # + list(map(id, model.model.classifier3.parameters())) + # # + list(map(id, model.model.classifier4.parameters())) + # # + list(map(id, model.model.classifier5.parameters()))) + # ) + + ignored_params = tuple(ignored_params) + + base_params = filter(lambda p: id( + p) not in ignored_params, model.model.parameters()) + + if args.pcb_different_lr: + print('PCB different lr') + if args.optimizer == 'SGD': + optimizer_pcb = optim.SGD([ + {'params': base_params, 'lr': 0.1 * args.lr}, + {'params': model.model.classifier0.parameters(), 'lr': args.lr}, + {'params': model.model.classifier1.parameters(), 'lr': args.lr}, + {'params': model.model.classifier2.parameters(), 'lr': args.lr}, + {'params': model.model.classifier3.parameters(), 'lr': args.lr}, + {'params': model.model.classifier4.parameters(), 'lr': args.lr}, + {'params': model.model.classifier5.parameters(), 'lr': args.lr}, + + ], weight_decay=5e-4, momentum=0.9, nesterov=True) + return optimizer_pcb + elif args.optimizer == 'ADAM': + params = [] + for i in range(args.parts): + name = 'classifier' + str(i) + c = getattr(model.model, name) + params.append({'params': c.parameters(), 'lr': args.lr}) + params = [{'params': base_params, + 'lr': 0.1 * args.lr}] + params + + optimizer_pcb = optim.Adam(params, weight_decay=5e-4) + + return optimizer_pcb + else: + raise('Optimizer not found, please choose adam or sgd.') + + if args.optimizer == 'SGD': + optimizer_function = optim.SGD + kwargs = { + 'momentum': args.momentum, + 'dampening': args.dampening, + 'nesterov': args.nesterov + } + elif args.optimizer == 'ADAM': + optimizer_function = optim.Adam + kwargs = { + 'betas': (args.beta1, args.beta2), + 'eps': args.epsilon, + 'amsgrad': args.amsgrad + } + elif args.optimizer == 'NADAM': + optimizer_function = NAdam + kwargs = { + 'betas': (args.beta1, args.beta2), + 'eps': args.epsilon + } + elif args.optimizer == 'RMSprop': + optimizer_function = optim.RMSprop + kwargs = { + 'eps': args.epsilon, + 'momentum': args.momentum + } + else: + raise Exception() + + kwargs['lr'] = args.lr + kwargs['weight_decay'] = args.weight_decay + + return optimizer_function(trainable, **kwargs) + + +def make_scheduler(args, optimizer, last_epoch): + + # if args.warmup in ['linear', 'constant'] and args.load == '' and args.pre_train == '': + milestones = args.decay_type.split('_') + milestones.pop(0) + milestones = list(map(lambda x: int(x), milestones)) + + scheduler = WarmupMultiStepLR( + optimizer, milestones, args.gamma, 0.01, 10, args.warmup, last_epoch=last_epoch) + return scheduler + + if args.decay_type == 'step': + scheduler = lrs.StepLR( + optimizer, + step_size=args.lr_decay, + gamma=args.gamma + ) + elif args.decay_type.find('step') >= 0: + milestones = args.decay_type.split('_') + milestones.pop(0) + milestones = list(map(lambda x: int(x), milestones)) + print(milestones, 'milestones') + scheduler = lrs.MultiStepLR( + optimizer, + milestones=milestones, + gamma=args.gamma + ) + + return scheduler diff --git a/optim/n_adam.py b/optim/n_adam.py new file mode 100755 index 000000000..47f21ec5c --- /dev/null +++ b/optim/n_adam.py @@ -0,0 +1,122 @@ +''' +Created on Mar 14, 2018 +@author: jyzhang +''' + +import math +import torch +from torch.optim import Optimizer + + +class NAdam(torch.optim.Optimizer): + """Implements Nesterov-accelerated Adam algorithm according to Keras. + + parameter name alias in different algorithms + NAdam Keras 054_report + exp_avg m_t m_t + exp_avg_prime prime{m}_t prime{m}_t + exp_avg_bar \\bar{m}_t bar{m}_t + exp_avg_sq v_t n_t + exp_avg_sq_prime prime{v}_t prime{n}_t + beta1 beta_1 mu + beta2 beta_2 v=0.999 + + It has been proposed in `Incorporating Nesterov Momentum into Adam`_. + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 1e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0), + but not used in NAdam + schedule_decay (float, optional): coefficients used for computing + moment schedule (default: 0.004) + .. _Incorporating Nesterov Momentum into Adam + http://cs229.stanford.edu/proj2015/054_report.pdf + .. _On the importance of initialization and momentum in deep learning + http://www.cs.toronto.edu/~fritz/absps/momentum.pdf + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=0, schedule_decay=0.004): + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, schedule_decay=schedule_decay) + super(NAdam, self).__init__(params, defaults) + + def __setstate__(self, state): + super(NAdam, self).__setstate__(state) + + def step(self, closure=None): + """Performs a single optimization step. + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError('NAdam does not support sparse gradients') + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p.data) + # \mu^{t} + state['m_schedule'] = 1. + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + + beta1, beta2 = group['betas'] + + schedule_decay = group['schedule_decay'] + + state['step'] += 1 + + if group['weight_decay'] != 0: + grad = grad.add(group['weight_decay'], p.data) + + # calculate the momentum cache \mu^{t} and \mu^{t+1} + momentum_cache_t = beta1 * ( \ + 1. - 0.5 * (pow(0.96, state['step'] * schedule_decay))) + momentum_cache_t_1 = beta1 * ( \ + 1. - 0.5 * (pow(0.96, (state['step'] + 1) * schedule_decay))) + m_schedule_new = state['m_schedule'] * momentum_cache_t + m_schedule_next = state['m_schedule'] * momentum_cache_t * momentum_cache_t_1 + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(1 - beta1, grad) + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + + g_prime = torch.div( grad, 1. - m_schedule_new) + exp_avg_prime = torch.div( exp_avg, 1. - m_schedule_next ) + exp_avg_sq_prime = torch.div(exp_avg_sq, 1. - pow(beta2, state['step'])) + + exp_avg_bar = torch.add( (1. - momentum_cache_t) * g_prime, \ + momentum_cache_t_1, exp_avg_prime ) + + denom = exp_avg_sq_prime.sqrt().add_(group['eps']) + + step_size = group['lr'] + + p.data.addcdiv_(-step_size, exp_avg_bar, denom) + + return loss \ No newline at end of file diff --git a/optim/nadam.py b/optim/nadam.py new file mode 100755 index 000000000..5a7cd3857 --- /dev/null +++ b/optim/nadam.py @@ -0,0 +1,85 @@ +import torch +from torch.optim import Optimizer + + +class Nadam(Optimizer): + """Implements Nadam algorithm (a variant of Adam based on Nesterov momentum). + It has been proposed in `Incorporating Nesterov Momentum into Adam`__. + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 2e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + schedule_decay (float, optional): momentum schedule decay (default: 4e-3) + __ http://cs229.stanford.edu/proj2015/054_report.pdf + __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf + """ + + def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=0, schedule_decay=4e-3): + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, schedule_decay=schedule_decay) + super(Nadam, self).__init__(params, defaults) + + def step(self, closure=None): + """Performs a single optimization step. + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + state['m_schedule'] = 1. + state['exp_avg'] = grad.new().resize_as_(grad).zero_() + state['exp_avg_sq'] = grad.new().resize_as_(grad).zero_() + + # Warming momentum schedule + m_schedule = state['m_schedule'] + schedule_decay = group['schedule_decay'] + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + beta1, beta2 = group['betas'] + eps = group['eps'] + + state['step'] += 1 + + if group['weight_decay'] != 0: + grad = grad.add(group['weight_decay'], p.data) + + momentum_cache_t = beta1 * \ + (1. - 0.5 * (0.96 ** (state['step'] * schedule_decay))) + momentum_cache_t_1 = beta1 * \ + (1. - 0.5 * + (0.96 ** ((state['step'] + 1) * schedule_decay))) + m_schedule_new = m_schedule * momentum_cache_t + m_schedule_next = m_schedule * momentum_cache_t * momentum_cache_t_1 + state['m_schedule'] = m_schedule_new + + # Decay the first and second moment running average coefficient + bias_correction2 = 1 - beta2 ** state['step'] + + exp_avg.mul_(beta1).add_(1 - beta1, grad) + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg_sq_prime = exp_avg_sq.div(1. - bias_correction2) + + denom = exp_avg_sq_prime.sqrt_().add_(group['eps']) + + p.data.addcdiv_(-group['lr']*(1. - momentum_cache_t)/(1. - m_schedule_new), grad, denom) + p.data.addcdiv_(-group['lr']*momentum_cache_t_1/(1. - m_schedule_next), exp_avg, denom) + + return loss \ No newline at end of file diff --git a/optim/warmup_scheduler.py b/optim/warmup_scheduler.py new file mode 100755 index 000000000..822c22f71 --- /dev/null +++ b/optim/warmup_scheduler.py @@ -0,0 +1,62 @@ +# encoding: utf-8 +""" +@author: liaoxingyu +@contact: sherlockliao01@gmail.com +""" +from bisect import bisect_right +import torch + + +# FIXME ideally this would be achieved with a CombinedLRScheduler, +# separating MultiStepLR with WarmupLR +# but the current LRScheduler design doesn't allow it + +class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): + def __init__( + self, + optimizer, + milestones, + gamma=0.1, + warmup_factor=1.0 / 3, + warmup_iters=500, + warmup_method="linear", + last_epoch=-1, + ): + if not list(milestones) == sorted(milestones): + raise ValueError( + "Milestones should be a list of" " increasing integers. Got {}", + milestones, + ) + + if warmup_method not in ("constant", "linear", "none"): + raise ValueError( + "Only 'constant' or 'linear' warmup_method accepted" + "got {}".format(warmup_method) + ) + self.milestones = milestones + # print(self.milestones) + self.gamma = gamma + self.warmup_factor = warmup_factor + self.warmup_iters = warmup_iters + self.warmup_method = warmup_method + super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) + + def get_lr(self): + warmup_factor = 1 + if self.last_epoch < self.warmup_iters: + if self.warmup_method == "constant": + # modified 18.02.2020 + # warmup_factor = self.warmup_factor + warmup_factor = (1 + self.last_epoch) / self.warmup_iters + + elif self.warmup_method == "linear": + alpha = self.last_epoch / self.warmup_iters + warmup_factor = self.warmup_factor * (1 - alpha) + alpha + elif self.warmup_method == "none": + warmup_factor = 1 + return [ + base_lr + * warmup_factor + * self.gamma ** bisect_right(self.milestones, self.last_epoch) + for base_lr in self.base_lrs + ] diff --git a/option.py b/option.py new file mode 100755 index 000000000..933abf2cd --- /dev/null +++ b/option.py @@ -0,0 +1,85 @@ +import argparse + +parser = argparse.ArgumentParser(description='MGN') + +parser.add_argument('--nThread', type=int, default=4, help='number of threads for data loading') +parser.add_argument('--cpu', action='store_true', help='if raise, use cpu only') +parser.add_argument('--nGPU', type=int, default=1, help='number of GPUs') + +parser.add_argument("--config", type=str, default="", help='config path') + +parser.add_argument("--datadir", type=str, default="Market-1501-v15.09.15", help='dataset directory root') +parser.add_argument('--data_train', type=str, default='Market1501', help='train dataset name') +parser.add_argument('--data_test', type=str, default='Market1501', help='test dataset name') + +parser.add_argument('--reset', action='store_true', help='reset the training') +parser.add_argument("--epochs", type=int, default=80, help='number of epochs to train') +parser.add_argument('--test_every', type=int, default=20, help='do test per every N epochs') +parser.add_argument("--batchid", type=int, default=16, help='the batch for id') +parser.add_argument("--batchimage", type=int, default=4, help='the batch of per id') +parser.add_argument("--batchtest", type=int, default=32, help='input batch size for test') +parser.add_argument('--test_only', action='store_true', help='set this option to test the model') +parser.add_argument('--sampler', type=str,default='True',help='do use sampler in dataloader') + + +parser.add_argument('--model', default='MGN', help='model name') +parser.add_argument('--loss', type=str, default='1*CrossEntropy+1*Triplet', help='loss function configuration') +parser.add_argument("--if_labelsmooth", action='store_true', help='Label Smooth Trick') +parser.add_argument("--bnneck", action='store_true', help='Apply bnneck before classifier, refer to BoT paper') +parser.add_argument("--feat_inference", type=str,default='after', help='Apply bnneck before classifier, refer to BoT paper') +parser.add_argument("--drop_block", action='store_true', help='Apply batch drop block') +parser.add_argument("--w_ratio", type=float, default=1.0, help='w_ratio of batch drop block') +parser.add_argument("--h_ratio", type=float, default=0.33, help='w_ratio of batch drop block') + + + +parser.add_argument('--act', type=str, default='relu', help='activation function') +parser.add_argument('--pool', type=str, default='avg', help='pool function') +parser.add_argument('--feats', type=int, default=256, help='number of feature maps') +parser.add_argument('--height', type=int, default=384, help='height of the input image') +parser.add_argument('--width', type=int, default=128, help='width of the input image') +parser.add_argument('--num_classes', type=int, default=751, help='') +parser.add_argument('--T', type=int, default=3, help='number of iterations of computing group loss') +parser.add_argument('--num_anchors', type=int, default=1, help='number of iterations of computing group loss') + + +parser.add_argument("--lr", type=float, default=2e-4, help='learning rate') +parser.add_argument('--optimizer', default='ADAM', choices=('SGD','ADAM','NADAM','RMSprop'), help='optimizer to use (SGD | ADAM | NADAM | RMSprop)') +parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum') +parser.add_argument('--dampening', type=float, default=0, help='SGD dampening') +parser.add_argument('--nesterov', action='store_true', help='SGD nesterov') +parser.add_argument('--beta1', type=float, default=0.9, help='ADAM beta1') +parser.add_argument('--beta2', type=float, default=0.999, help='ADAM beta2') +parser.add_argument('--amsgrad', action='store_true', help='ADAM amsgrad') +parser.add_argument('--epsilon', type=float, default=1e-8, help='ADAM epsilon for numerical stability') +parser.add_argument('--gamma', type=float, default=0.1, help='learning rate decay factor for step decay') +parser.add_argument('--weight_decay', type=float, default=5e-4, help='weight decay') +parser.add_argument('--decay_type', type=str, default='step', help='learning rate decay type') +parser.add_argument('--lr_decay', type=int, default=60, help='learning rate decay per N epochs') +parser.add_argument('--warmup', type=str, default='none', help='warmup iteration, option: linear, constant, none') +parser.add_argument('--pcb_different_lr', type=str,default='True', help='use different lr in pcb optimizer') + +parser.add_argument('--parts', type=int, default=6, help='parts of PCB model') +parser.add_argument("--margin", type=float, default=1.2, help='') +parser.add_argument("--re_rank", action='store_true', help='if raise, use re-ranking') +parser.add_argument("--cutout", action='store_true', help='if raise, use cutout augmentation') + +parser.add_argument("--random_erasing", action='store_true', help='') +parser.add_argument("--probability", type=float, default=0.5, help='') + +# parser.add_argument("--savedir", type=str, default='saved_models', help='directory name to save') +# parser.add_argument("--outdir", type=str, default='out', help='') +# parser.add_argument("--resume", action='store_true', help='whether resume training from specific checkpoint') +# parser.add_argument('--save_models', action='store_true', help='save all intermediate models') +parser.add_argument('--save', type=str, default='test', help='file name to save') +parser.add_argument('--load', type=str, default='', help='file name to load') +parser.add_argument('--pre_train', type=str, default='', help='pre-trained model directory') + +args = parser.parse_args() + +for arg in vars(args): + if vars(args)[arg] == 'True': + vars(args)[arg] = True + elif vars(args)[arg] == 'False': + vars(args)[arg] = False + diff --git a/opts.yaml b/opts.yaml new file mode 100644 index 000000000..c181d990d --- /dev/null +++ b/opts.yaml @@ -0,0 +1,51 @@ +T: 3 +act: relu +amsgrad: false +batchid: 16 +batchimage: 4 +batchtest: 32 +beta1: 0.9 +beta2: 0.999 +bnneck: false +config: '' +cpu: false +dampening: 0 +data_test: Market1501 +data_train: Market1501 +datadir: Market-1501-v15.09.15 +decay_type: step +epochs: 80 +epsilon: 1.0e-08 +feat_inference: after +feats: 256 +gamma: 0.1 +height: 384 +if_labelsmooth: false +loss: 1*CrossEntropy+1*Triplet +lr: 0.0002 +lr_decay: 60 +margin: 1.2 +model: MGN +momentum: 0.9 +nGPU: 1 +nThread: 4 +nesterov: false +num_anchors: 1 +num_classes: 751 +optimizer: ADAM +outdir: out +pool: avg +pre_train: '' +probability: 0.5 +random_erasing: false +re_rank: false +reset: false +resume: 0 +sampler: true +save_models: false +savedir: saved_models +test_every: 20 +test_only: false +warmup: none +weight_decay: 0.0005 +width: 128 diff --git "a/utils/Icon\r" "b/utils/Icon\r" new file mode 100644 index 000000000..e69de29bb diff --git a/utils/functions.py b/utils/functions.py new file mode 100755 index 000000000..10c6b2e52 --- /dev/null +++ b/utils/functions.py @@ -0,0 +1,251 @@ +from collections import defaultdict +import numpy as np +import torch +from sklearn.metrics import average_precision_score + + +def _unique_sample(ids_dict, num): + mask = np.zeros(num, dtype=np.bool) + for _, indices in ids_dict.items(): + i = np.random.choice(indices) + mask[i] = True + return mask + + +def cmc(distmat, query_ids=None, gallery_ids=None, + query_cams=None, gallery_cams=None, topk=100, + separate_camera_set=False, + single_gallery_shot=False, + first_match_break=False): + m, n = distmat.shape + # Fill up default values + if query_ids is None: + query_ids = np.arange(m) + if gallery_ids is None: + gallery_ids = np.arange(n) + if query_cams is None: + query_cams = np.zeros(m).astype(np.int32) + if gallery_cams is None: + gallery_cams = np.ones(n).astype(np.int32) + # Ensure numpy array + query_ids = np.asarray(query_ids) + gallery_ids = np.asarray(gallery_ids) + query_cams = np.asarray(query_cams) + gallery_cams = np.asarray(gallery_cams) + # Sort and find correct matches + indices = np.argsort(distmat, axis=1) + matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) + # Compute CMC for each query + ret = np.zeros(topk) + num_valid_queries = 0 + for i in range(m): + # Filter out the same id and same camera + valid = ((gallery_ids[indices[i]] != query_ids[i]) | + (gallery_cams[indices[i]] != query_cams[i])) + if separate_camera_set: + # Filter out samples from same camera + valid &= (gallery_cams[indices[i]] != query_cams[i]) + if not np.any(matches[i, valid]): + continue + if single_gallery_shot: + repeat = 10 + gids = gallery_ids[indices[i][valid]] + inds = np.where(valid)[0] + ids_dict = defaultdict(list) + for j, x in zip(inds, gids): + ids_dict[x].append(j) + else: + repeat = 1 + for _ in range(repeat): + if single_gallery_shot: + # Randomly choose one instance for each id + sampled = (valid & _unique_sample(ids_dict, len(valid))) + index = np.nonzero(matches[i, sampled])[0] + else: + index = np.nonzero(matches[i, valid])[0] + delta = 1. / (len(index) * repeat) + for j, k in enumerate(index): + if k - j >= topk: + break + if first_match_break: + ret[k - j] += 1 + break + ret[k - j] += delta + num_valid_queries += 1 + if num_valid_queries == 0: + raise RuntimeError("No valid query") + return ret.cumsum() / num_valid_queries + + +def mean_ap(distmat, query_ids=None, gallery_ids=None, + query_cams=None, gallery_cams=None): + m, n = distmat.shape + # Fill up default values + if query_ids is None: + query_ids = np.arange(m) + if gallery_ids is None: + gallery_ids = np.arange(n) + if query_cams is None: + query_cams = np.zeros(m).astype(np.int32) + if gallery_cams is None: + gallery_cams = np.ones(n).astype(np.int32) + # Ensure numpy array + query_ids = np.asarray(query_ids) + gallery_ids = np.asarray(gallery_ids) + query_cams = np.asarray(query_cams) + gallery_cams = np.asarray(gallery_cams) + # Sort and find correct matches + indices = np.argsort(distmat, axis=1) + matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) + # Compute AP for each query + aps = [] + for i in range(m): + # Filter out the same id and same camera + valid = ((gallery_ids[indices[i]] != query_ids[i]) | + (gallery_cams[indices[i]] != query_cams[i])) + y_true = matches[i, valid] + y_score = -distmat[i][indices[i]][valid] + if not np.any(y_true): + continue + aps.append(average_precision_score(y_true, y_score)) + if len(aps) == 0: + raise RuntimeError("No valid query") + return np.mean(aps) + + +def compute_mAP_baseline(index, good_index, junk_index): + ap = 0 + cmc = torch.IntTensor(len(index)).zero_() + if good_index.size==0: # if empty + cmc[0] = -1 + return ap,cmc + + # remove junk_index + mask = np.in1d(index, junk_index, invert=True) + index = index[mask] + + # find good_index index + ngood = len(good_index) + mask = np.in1d(index, good_index) + rows_good = np.argwhere(mask==True) + rows_good = rows_good.flatten() + + cmc[rows_good[0]:] = 1 + for i in range(ngood): + d_recall = 1.0/ngood + precision = (i+1)*1.0/(rows_good[i]+1) + if rows_good[i]!=0: + old_precision = i*1.0/rows_good[i] + else: + old_precision=1.0 + ap = ap + d_recall*(old_precision + precision)/2 + + return ap, cmc + +def cmc_baseline(distmat, query_ids=None, gallery_ids=None, + query_cams=None, gallery_cams=None, topk=100, + separate_camera_set=False, + single_gallery_shot=False, + first_match_break=False): + m, n = distmat.shape + # Fill up default values + if query_ids is None: + query_ids = np.arange(m) + if gallery_ids is None: + gallery_ids = np.arange(n) + if query_cams is None: + query_cams = np.zeros(m).astype(np.int32) + if gallery_cams is None: + gallery_cams = np.ones(n).astype(np.int32) + # Ensure numpy array + query_ids = np.asarray(query_ids) + gallery_ids = np.asarray(gallery_ids) + query_cams = np.asarray(query_cams) + gallery_cams = np.asarray(gallery_cams) + + ####################################################### + ##################################################### + CMC = torch.IntTensor(len(gallery_ids)).zero_() + ap = 0.0 + for i in range(m): + # predict index + index = np.argsort(distmat[i]) #from small to large + # index = index[::-1] + # index = index[0:2000] + # good index + query_index = np.argwhere(gallery_ids==query_ids[i]) + camera_index = np.argwhere(gallery_cams==query_cams[i]) + + good_index = np.setdiff1d(query_index, camera_index, assume_unique=True) + junk_index1 = np.argwhere(gallery_ids==-1) + junk_index2 = np.intersect1d(query_index, camera_index) + junk_index = np.append(junk_index2, junk_index1) #.flatten()) + + ap_tmp, CMC_tmp = compute_mAP_baseline(index, good_index, junk_index) + if CMC_tmp[0]==-1: + continue + CMC = CMC + CMC_tmp + ap += ap_tmp + CMC = CMC.float() + CMC = CMC/m #average CMC + mAP = ap/m + + return CMC, mAP + + +def eval_liaoxingyu(distmat, q_pids, g_pids, q_camids, g_camids, max_rank): + """Evaluation with market1501 metric + Key: for each query identity, its gallery images from the same camera view are discarded. + """ + num_q, num_g = distmat.shape + + if num_g < max_rank: + max_rank = num_g + print("Note: number of gallery samples is quite small, got {}".format(num_g)) + + indices = np.argsort(distmat, axis=1) + matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) + + # compute cmc curve for each query + all_cmc = [] + all_AP = [] + num_valid_q = 0. # number of valid query + + for q_idx in range(num_q): + # get query pid and camid + q_pid = q_pids[q_idx] + q_camid = q_camids[q_idx] + + # remove gallery samples that have the same pid and camid with query + order = indices[q_idx] + remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) + keep = np.invert(remove) + + # compute cmc curve + raw_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches + if not np.any(raw_cmc): + # this condition is true when query identity does not appear in gallery + continue + + cmc = raw_cmc.cumsum() + cmc[cmc > 1] = 1 + + all_cmc.append(cmc[:max_rank]) + num_valid_q += 1. + + # compute average precision + # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision + num_rel = raw_cmc.sum() + tmp_cmc = raw_cmc.cumsum() + tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)] + tmp_cmc = np.asarray(tmp_cmc) * raw_cmc + AP = tmp_cmc.sum() / num_rel + all_AP.append(AP) + + assert num_valid_q > 0, "Error: all query identities do not appear in gallery" + + all_cmc = np.asarray(all_cmc)#ß + all_cmc = all_cmc.sum(0) / num_valid_q + mAP = np.mean(all_AP) + + return all_cmc, mAP diff --git a/utils/model_complexity.py b/utils/model_complexity.py new file mode 100644 index 000000000..296ded4b0 --- /dev/null +++ b/utils/model_complexity.py @@ -0,0 +1,360 @@ +from __future__ import division, print_function, absolute_import +import math +import numpy as np +from itertools import repeat +from collections import namedtuple, defaultdict +import torch + +__all__ = ['compute_model_complexity'] +""" +Utility +""" + + +def _ntuple(n): + + def parse(x): + if isinstance(x, int): + return tuple(repeat(x, n)) + return x + + return parse + + +_single = _ntuple(1) +_pair = _ntuple(2) +_triple = _ntuple(3) +""" +Convolution +""" + + +def hook_convNd(m, x, y): + k = torch.prod(torch.Tensor(m.kernel_size)).item() + cin = m.in_channels + flops_per_ele = k * cin #+ (k*cin-1) + if m.bias is not None: + flops_per_ele += 1 + flops = flops_per_ele * y.numel() / m.groups + return int(flops) + + +""" +Pooling +""" + + +def hook_maxpool1d(m, x, y): + flops_per_ele = m.kernel_size - 1 + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_maxpool2d(m, x, y): + k = _pair(m.kernel_size) + k = torch.prod(torch.Tensor(k)).item() + # ops: compare + flops_per_ele = k - 1 + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_maxpool3d(m, x, y): + k = _triple(m.kernel_size) + k = torch.prod(torch.Tensor(k)).item() + flops_per_ele = k - 1 + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_avgpool1d(m, x, y): + flops_per_ele = m.kernel_size + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_avgpool2d(m, x, y): + k = _pair(m.kernel_size) + k = torch.prod(torch.Tensor(k)).item() + flops_per_ele = k + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_avgpool3d(m, x, y): + k = _triple(m.kernel_size) + k = torch.prod(torch.Tensor(k)).item() + flops_per_ele = k + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_adapmaxpool1d(m, x, y): + x = x[0] + out_size = m.output_size + k = math.ceil(x.size(2) / out_size) + flops_per_ele = k - 1 + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_adapmaxpool2d(m, x, y): + x = x[0] + out_size = _pair(m.output_size) + k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) + k = torch.prod(torch.ceil(k)).item() + flops_per_ele = k - 1 + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_adapmaxpool3d(m, x, y): + x = x[0] + out_size = _triple(m.output_size) + k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) + k = torch.prod(torch.ceil(k)).item() + flops_per_ele = k - 1 + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_adapavgpool1d(m, x, y): + x = x[0] + out_size = m.output_size + k = math.ceil(x.size(2) / out_size) + flops_per_ele = k + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_adapavgpool2d(m, x, y): + x = x[0] + out_size = _pair(m.output_size) + k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) + k = torch.prod(torch.ceil(k)).item() + flops_per_ele = k + flops = flops_per_ele * y.numel() + return int(flops) + + +def hook_adapavgpool3d(m, x, y): + x = x[0] + out_size = _triple(m.output_size) + k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) + k = torch.prod(torch.ceil(k)).item() + flops_per_ele = k + flops = flops_per_ele * y.numel() + return int(flops) + + +""" +Non-linear activations +""" + + +def hook_relu(m, x, y): + # eq: max(0, x) + num_ele = y.numel() + return int(num_ele) + + +def hook_leakyrelu(m, x, y): + # eq: max(0, x) + negative_slope*min(0, x) + num_ele = y.numel() + flops = 3 * num_ele + return int(flops) + + +""" +Normalization +""" + + +def hook_batchnormNd(m, x, y): + num_ele = y.numel() + flops = 2 * num_ele # mean and std + if m.affine: + flops += 2 * num_ele # gamma and beta + return int(flops) + + +def hook_instancenormNd(m, x, y): + return hook_batchnormNd(m, x, y) + + +def hook_groupnorm(m, x, y): + return hook_batchnormNd(m, x, y) + + +def hook_layernorm(m, x, y): + num_ele = y.numel() + flops = 2 * num_ele # mean and std + if m.elementwise_affine: + flops += 2 * num_ele # gamma and beta + return int(flops) + + +""" +Linear +""" + + +def hook_linear(m, x, y): + flops_per_ele = m.in_features #+ (m.in_features-1) + if m.bias is not None: + flops_per_ele += 1 + flops = flops_per_ele * y.numel() + return int(flops) + + +__generic_flops_counter = { + # Convolution + 'Conv1d': hook_convNd, + 'Conv2d': hook_convNd, + 'Conv3d': hook_convNd, + # Pooling + 'MaxPool1d': hook_maxpool1d, + 'MaxPool2d': hook_maxpool2d, + 'MaxPool3d': hook_maxpool3d, + 'AvgPool1d': hook_avgpool1d, + 'AvgPool2d': hook_avgpool2d, + 'AvgPool3d': hook_avgpool3d, + 'AdaptiveMaxPool1d': hook_adapmaxpool1d, + 'AdaptiveMaxPool2d': hook_adapmaxpool2d, + 'AdaptiveMaxPool3d': hook_adapmaxpool3d, + 'AdaptiveAvgPool1d': hook_adapavgpool1d, + 'AdaptiveAvgPool2d': hook_adapavgpool2d, + 'AdaptiveAvgPool3d': hook_adapavgpool3d, + # Non-linear activations + 'ReLU': hook_relu, + 'ReLU6': hook_relu, + 'LeakyReLU': hook_leakyrelu, + # Normalization + 'BatchNorm1d': hook_batchnormNd, + 'BatchNorm2d': hook_batchnormNd, + 'BatchNorm3d': hook_batchnormNd, + 'InstanceNorm1d': hook_instancenormNd, + 'InstanceNorm2d': hook_instancenormNd, + 'InstanceNorm3d': hook_instancenormNd, + 'GroupNorm': hook_groupnorm, + 'LayerNorm': hook_layernorm, + # Linear + 'Linear': hook_linear, +} + +__conv_linear_flops_counter = { + # Convolution + 'Conv1d': hook_convNd, + 'Conv2d': hook_convNd, + 'Conv3d': hook_convNd, + # Linear + 'Linear': hook_linear, +} + + +def _get_flops_counter(only_conv_linear): + if only_conv_linear: + return __conv_linear_flops_counter + return __generic_flops_counter + + +def compute_model_complexity( + model, input_size, verbose=False, only_conv_linear=True +): + """Returns number of parameters and FLOPs. + .. note:: + (1) this function only provides an estimate of the theoretical time complexity + rather than the actual running time which depends on implementations and hardware, + and (2) the FLOPs is only counted for layers that are used at test time. This means + that redundant layers such as person ID classification layer will be ignored as it + is discarded when doing feature extraction. Note that the inference graph depends on + how you construct the computations in ``forward()``. + Args: + model (nn.Module): network model. + input_size (tuple): input size, e.g. (1, 3, 256, 128). + verbose (bool, optional): shows detailed complexity of + each module. Default is False. + only_conv_linear (bool, optional): only considers convolution + and linear layers when counting flops. Default is True. + If set to False, flops of all layers will be counted. + Examples:: + >>> from torchreid import models, utils + >>> model = models.build_model(name='resnet50', num_classes=1000) + >>> num_params, flops = utils.compute_model_complexity(model, (1, 3, 256, 128), verbose=True) + """ + registered_handles = [] + layer_list = [] + layer = namedtuple('layer', ['class_name', 'params', 'flops']) + + def _add_hooks(m): + + def _has_submodule(m): + return len(list(m.children())) > 0 + + def _hook(m, x, y): + params = sum(p.numel() for p in m.parameters()) + class_name = str(m.__class__.__name__) + flops_counter = _get_flops_counter(only_conv_linear) + if class_name in flops_counter: + flops = flops_counter[class_name](m, x, y) + else: + flops = 0 + layer_list.append( + layer(class_name=class_name, params=params, flops=flops) + ) + + # only consider the very basic nn layer + if _has_submodule(m): + return + + handle = m.register_forward_hook(_hook) + registered_handles.append(handle) + + default_train_mode = model.training + + model.eval().apply(_add_hooks) + input = torch.rand(input_size) + if next(model.parameters()).is_cuda: + input = input.cuda() + model(input) # forward + + for handle in registered_handles: + handle.remove() + + model.train(default_train_mode) + + if verbose: + per_module_params = defaultdict(list) + per_module_flops = defaultdict(list) + + total_params, total_flops = 0, 0 + + for layer in layer_list: + total_params += layer.params + total_flops += layer.flops + if verbose: + per_module_params[layer.class_name].append(layer.params) + per_module_flops[layer.class_name].append(layer.flops) + + if verbose: + num_udscore = 55 + print(' {}'.format('-' * num_udscore)) + print(' Model complexity with input size {}'.format(input_size)) + print(' {}'.format('-' * num_udscore)) + for class_name in per_module_params: + params = int(np.sum(per_module_params[class_name])) + flops = int(np.sum(per_module_flops[class_name])) + print( + ' {} (params={:,}, flops={:,})'.format( + class_name, params, flops + ) + ) + print(' {}'.format('-' * num_udscore)) + print( + ' Total (params={:,}, flops={:,})'.format( + total_params, total_flops + ) + ) + print(' {}'.format('-' * num_udscore)) + + return total_params, total_flops \ No newline at end of file diff --git a/utils/random_erasing.py b/utils/random_erasing.py new file mode 100755 index 000000000..72343c9b7 --- /dev/null +++ b/utils/random_erasing.py @@ -0,0 +1,86 @@ +from __future__ import absolute_import + +from torchvision.transforms import * + +from PIL import Image +import random +import math +import numpy as np +import torch + + +class Cutout(object): + def __init__(self, probability=0.5, size=64, mean=[0.4914, 0.4822, 0.4465]): + self.probability = probability + self.mean = mean + self.size = size + + def __call__(self, img): + + if random.uniform(0, 1) > self.probability: + return img + + h = self.size + w = self.size + for attempt in range(100): + area = img.size()[1] * img.size()[2] + if w < img.size()[2] and h < img.size()[1]: + x1 = random.randint(0, img.size()[1] - h) + y1 = random.randint(0, img.size()[2] - w) + if img.size()[0] == 3: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] + img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] + else: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + return img + return img + + +class RandomErasing(object): + """ Randomly selects a rectangle region in an image and erases its pixels. + 'Random Erasing Data Augmentation' by Zhong et al. + See https://arxiv.org/pdf/1708.04896.pdf + Args: + probability: The probability that the Random Erasing operation will be performed. + sl: Minimum proportion of erased area against input image. + sh: Maximum proportion of erased area against input image. + r1: Minimum aspect ratio of erased area. + mean: Erasing value. + """ + + def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]): + # def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]): + + self.probability = probability + self.mean = mean + self.sl = sl + self.sh = sh + self.r1 = r1 + + def __call__(self, img): + + if random.uniform(0, 1) > self.probability: + return img + + for attempt in range(100): + area = img.size()[1] * img.size()[2] + + target_area = random.uniform(self.sl, self.sh) * area + aspect_ratio = random.uniform(self.r1, 1 / self.r1) + + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + + if w < img.size()[2] and h < img.size()[1]: + x1 = random.randint(0, img.size()[1] - h) + y1 = random.randint(0, img.size()[2] - w) + if img.size()[0] == 3: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] + img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] + else: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + return img + + return img diff --git a/utils/re_ranking.py b/utils/re_ranking.py new file mode 100755 index 000000000..e878476ae --- /dev/null +++ b/utils/re_ranking.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python2/python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 26 14:46:56 2017 +@author: luohao +Modified by Houjing Huang, 2017-12-22. +- This version accepts distance matrix instead of raw features. +- The difference of `/` division between python 2 and 3 is handled. +- numpy.float16 is replaced by numpy.float32 for numerical precision. + +Modified by Zhedong Zheng, 2018-1-12. +- replace sort with topK, which save about 30s. +""" + +""" +CVPR2017 paper:Zhong Z, Zheng L, Cao D, et al. Re-ranking Person Re-identification with k-reciprocal Encoding[J]. 2017. +url:http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhong_Re-Ranking_Person_Re-Identification_CVPR_2017_paper.pdf +Matlab version: https://github.com/zhunzhong07/person-re-ranking +""" + +""" +API +q_g_dist: query-gallery distance matrix, numpy array, shape [num_query, num_gallery] +q_q_dist: query-query distance matrix, numpy array, shape [num_query, num_query] +g_g_dist: gallery-gallery distance matrix, numpy array, shape [num_gallery, num_gallery] +k1, k2, lambda_value: parameters, the original paper is (k1=20, k2=6, lambda_value=0.3) +Returns: + final_dist: re-ranked distance, numpy array, shape [num_query, num_gallery] +""" + + +import numpy as np + +def k_reciprocal_neigh( initial_rank, i, k1): + forward_k_neigh_index = initial_rank[i,:k1+1] + backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] + fi = np.where(backward_k_neigh_index==i)[0] + return forward_k_neigh_index[fi] + +def re_ranking(q_g_dist, q_q_dist, g_g_dist, k1=20, k2=6, lambda_value=0.3): + # The following naming, e.g. gallery_num, is different from outer scope. + # Don't care about it. + original_dist = np.concatenate( + [np.concatenate([q_q_dist, q_g_dist], axis=1), + np.concatenate([q_g_dist.T, g_g_dist], axis=1)], + axis=0) + original_dist = 2. - 2 * original_dist #np.power(original_dist, 2).astype(np.float32) + original_dist = np.transpose(1. * original_dist/np.max(original_dist,axis = 0)) + V = np.zeros_like(original_dist).astype(np.float32) + #initial_rank = np.argsort(original_dist).astype(np.int32) + # top K1+1 + initial_rank = np.argpartition( original_dist, range(1,k1+1) ) + + query_num = q_g_dist.shape[0] + all_num = original_dist.shape[0] + + for i in range(all_num): + # k-reciprocal neighbors + k_reciprocal_index = k_reciprocal_neigh( initial_rank, i, k1) + k_reciprocal_expansion_index = k_reciprocal_index + for j in range(len(k_reciprocal_index)): + candidate = k_reciprocal_index[j] + candidate_k_reciprocal_index = k_reciprocal_neigh( initial_rank, candidate, int(np.around(k1/2))) + if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2./3*len(candidate_k_reciprocal_index): + k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) + + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) + weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) + V[i,k_reciprocal_expansion_index] = 1.*weight/np.sum(weight) + + original_dist = original_dist[:query_num,] + if k2 != 1: + V_qe = np.zeros_like(V,dtype=np.float32) + for i in range(all_num): + V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) + V = V_qe + del V_qe + del initial_rank + invIndex = [] + for i in range(all_num): + invIndex.append(np.where(V[:,i] != 0)[0]) + + jaccard_dist = np.zeros_like(original_dist,dtype = np.float32) + + for i in range(query_num): + temp_min = np.zeros(shape=[1,all_num],dtype=np.float32) + indNonZero = np.where(V[i,:] != 0)[0] + indImages = [] + indImages = [invIndex[ind] for ind in indNonZero] + for j in range(len(indNonZero)): + temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) + jaccard_dist[i] = 1-temp_min/(2.-temp_min) + + final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value + del original_dist + del V + del jaccard_dist + final_dist = final_dist[:query_num,query_num:] + return final_dist diff --git a/utils/utility.py b/utils/utility.py new file mode 100755 index 000000000..72a1f657b --- /dev/null +++ b/utils/utility.py @@ -0,0 +1,333 @@ +import os +import datetime + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import torch +import numpy as np +import os.path as osp + +import yaml +from collections import OrderedDict +from shutil import copyfile, copytree +import pickle +import warnings + +ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + + +class checkpoint(): + def __init__(self, args): + self.args = args + self.log = torch.Tensor() + self.since = datetime.datetime.now() + now = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S') + + if args.load == '': + if args.save == '': + args.save = now + self.dir = ROOT_PATH + '/experiment/' + args.save + else: + self.dir = ROOT_PATH + '/experiment/' + args.load + if not os.path.exists(self.dir): + args.load = '' + else: + # pass + # if args.resume != 0: + # self.add_log(torch.tensor( + # [args.resume, 0, 0, 0, 0, 0], dtype=torch.float32).reshape(1, 6)) + # else: + # self.log = torch.load(self.dir + '/map_log.pt') + if os.path.exists(self.dir + '/map_log.pt'): + self.log = torch.load(self.dir + '/map_log.pt') + # print('Continue from epoch {}...'.format( + # len(self.log) * args.test_every)) + + print('Experiment results will be saved in {} '.format(self.dir)) + + if args.reset: + os.system('rm -rf ' + self.dir) + args.load = '' + + def _make_dir(path): + if not os.path.exists(path): + os.makedirs(path) + + _make_dir(self.dir) + + if not args.test_only: + + # _make_dir(self.dir + '/model') + _make_dir(self.dir + '/scripts') + + copytree(os.path.join(ROOT_PATH, 'model'), self.dir + '/scripts/model' + + datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')) + copytree(os.path.join(ROOT_PATH, 'loss'), self.dir + '/scripts/loss' + + datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')) + # copyfile(os.path.join(ROOT_PATH, 'engine.py'), self.dir + + # '/scripts/engine{}.py'.format(datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))) + + open_type = 'a' if os.path.exists(self.dir + '/log.txt') else 'w' + self.log_file = open(self.dir + '/log.txt', open_type) + with open(self.dir + '/config.txt', open_type) as f: + f.write(now + '\n\n') + for arg in vars(args): + f.write('{}: {}\n'.format(arg, getattr(args, arg))) + f.write('\n') + + with open(self.dir + '/config.yaml', open_type) as fp: + dic = vars(args).copy() + del dic['load'], dic['save'], dic['pre_train'], dic['test_only'], dic['re_rank'] + yaml.dump(dic, fp, default_flow_style=False) + + # def save(self, trainer, epoch, is_best=False): + # trainer.model.save(self.dir, epoch, is_best=is_best) + # trainer.loss.save(self.dir) + # # trainer.loss.plot_loss(self.dir, epoch) + + # self.plot_map_rank(epoch) + # torch.save(self.log, os.path.join(self.dir, 'map_log.pt')) + # torch.save({'state_dict': trainer.optimizer.state_dict(), 'epoch': epoch}, + # os.path.join(self.dir, 'model', + # 'optimizer.pt') + # ) + + def add_log(self, log): + self.log = torch.cat([self.log, log]) + + def write_log(self, log, refresh=False, end='\n'): + time_elapsed = (datetime.datetime.now() - self.since).seconds + log = log + ' Time used: {} m {} s'.format( + time_elapsed // 60, time_elapsed % 60) + print(log, end=end) + if end != '': + self.log_file.write(log + end) + if refresh: + self.log_file.close() + self.log_file = open(self.dir + '/log.txt', 'a') + + def done(self): + self.log_file.close() + + def plot_map_rank(self, epoch): + axis = np.linspace(1, epoch, self.log.size(0)) + label = 'Reid on {}'.format(self.args.data_test) + labels = ['mAP', 'rank1', 'rank3', 'rank5', 'rank10'] + fig = plt.figure() + plt.title(label) + for i in range(len(labels)): + plt.plot(axis, self.log[:, i + 1].numpy(), label=labels[i]) + + plt.legend() + plt.xlabel('Epochs') + plt.ylabel('mAP/rank') + plt.grid(True) + plt.savefig('{}/test_{}.jpg'.format(self.dir, self.args.data_test)) + plt.close(fig) + + def save_results(self, filename, save_list, scale): + pass + + def save_checkpoint( + self, state, save_dir, is_best=False, remove_module_from_keys=False + ): + r"""Saves checkpoint. + + Args: + state (dict): dictionary. + save_dir (str): directory to save checkpoint. + is_best (bool, optional): if True, this checkpoint will be copied and named + ``model-best.pth.tar``. Default is False. + remove_module_from_keys (bool, optional): whether to remove "module." + from layer names. Default is False. + + Examples:: + >>> state = { + >>> 'state_dict': model.state_dict(), + >>> 'epoch': 10, + >>> 'rank1': 0.5, + >>> 'optimizer': optimizer.state_dict() + >>> } + >>> save_checkpoint(state, 'log/my_model') + """ + def mkdir_if_missing(dirname): + """Creates dirname if it is missing.""" + if not osp.exists(dirname): + try: + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + mkdir_if_missing(save_dir) + if remove_module_from_keys: + # remove 'module.' in state_dict's keys + state_dict = state['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('module.'): + k = k[7:] + new_state_dict[k] = v + state['state_dict'] = new_state_dict + # save + # fpath = osp.join(save_dir, 'model.pth.tar-' + str(epoch)) + fpath = osp.join(save_dir, 'model.pth.tar-latest') + torch.save(state, fpath) + self.write_log('[INFO] Checkpoint saved to "{}"'.format(fpath)) + if is_best: + # shutil.copy(fpath, osp.join(osp.dirname(fpath), 'model-best.pth.tar')) + torch.save(state['state_dict'], osp.join( + save_dir, 'model-best.pth.tar')) + if 'log' in state.keys(): + + torch.save(state['log'], os.path.join(save_dir, 'map_log.pt')) + + def load_checkpoint(self, fpath): + # """Loads checkpoint. + # ``UnicodeDecodeError`` can be well handled, which means + # python2-saved files can be read from python3. + # Args: + # fpath (str): path to checkpoint. + # Returns: + # dict + # Examples:: + # >>> from torchreid.utils import load_checkpoint + # >>> fpath = 'log/my_model/model.pth.tar-10' + # >>> checkpoint = load_checkpoint(fpath) + # """ + if fpath is None: + raise ValueError('File path is None') + if not osp.exists(fpath): + raise FileNotFoundError('File is not found at "{}"'.format(fpath)) + map_location = None if torch.cuda.is_available() else 'cpu' + try: + checkpoint = torch.load(fpath, map_location=map_location) + except UnicodeDecodeError: + pickle.load = partial(pickle.load, encoding="latin1") + pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") + checkpoint = torch.load( + fpath, pickle_module=pickle, map_location=map_location + ) + except Exception: + print('Unable to load checkpoint from "{}"'.format(fpath)) + raise + return checkpoint + + def load_pretrained_weights(self, model, weight_path): + r"""Loads pretrianed weights to model. + Features:: + - Incompatible layers (unmatched in name or size) will be ignored. + - Can automatically deal with keys containing "module.". + Args: + model (nn.Module): network model. + weight_path (str): path to pretrained weights. + Examples:: + >>> from torchreid.utils import load_pretrained_weights + >>> weight_path = 'log/my_model/model-best.pth.tar' + >>> load_pretrained_weights(model, weight_path) + """ + checkpoint = self.load_checkpoint(weight_path) + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + model_dict = model.state_dict() + # print(model_dict.keys()) + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + for k, v in state_dict.items(): + # print(k) + if k.startswith('module.'): + k = 'model.' + k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + warnings.warn( + 'The pretrained weights "{}" cannot be loaded, ' + 'please check the key names manually ' + '(** ignored and continue **)'.format(weight_path) + ) + else: + print( + 'Successfully loaded pretrained weights from "{}"'. + format(weight_path) + ) + if len(discarded_layers) > 0: + print( + '** The following layers are discarded ' + 'due to unmatched keys or layer size: {}'. + format(discarded_layers) + ) + + def resume_from_checkpoint(self, fpath, model, optimizer=None, scheduler=None): + r"""Resumes training from a checkpoint. + + This will load (1) model weights and (2) ``state_dict`` + of optimizer if ``optimizer`` is not None. + + Args: + fpath (str): path to checkpoint. + model (nn.Module): model. + optimizer (Optimizer, optional): an Optimizer. + scheduler (LRScheduler, optional): an LRScheduler. + + Returns: + int: start_epoch. + + Examples:: + >>> from torchreid.utils import resume_from_checkpoint + >>> fpath = 'log/my_model/model.pth.tar-10' + >>> start_epoch = resume_from_checkpoint( + >>> fpath, model, optimizer, scheduler + >>> ) + """ + self.write_log('[INFO] Loading checkpoint from "{}"'.format(fpath)) + checkpoint = self.load_checkpoint(fpath) + # print(checkpoint['state_dict'].keys()) + # print('oooooooooooooo') + # print(model.state_dict().keys()) + # kk=[] + # ll=[] + # for k,l in zip(model.state_dict().keys(),checkpoint['state_dict'].keys()): + # if k == l : + # kk.append(k) + # else: + # ll.append(k) + # print('not match') + # print(kk) + # print(ll) + # for k in model.state_dict().keys(): + # print(k) + # for l in checkpoint['state_dict'].keys(): + # print(l) + # print(checkpoint['state_dict']) + # print(len(checkpoint['state_dict'].keys())) + # print(len(model.state_dict().keys())) + # print(model.state_dict()) + model.load_state_dict(checkpoint['state_dict']) + self.write_log('[INFO] Model weights loaded') + if optimizer is not None and 'optimizer' in checkpoint.keys(): + optimizer.load_state_dict(checkpoint['optimizer']) + self.write_log('[INFO] Optimizer loaded') + if scheduler is not None and 'scheduler' in checkpoint.keys(): + scheduler.load_state_dict(checkpoint['scheduler']) + self.write_log('[INFO] Scheduler loaded') + start_epoch = checkpoint['epoch'] + self.write_log('[INFO] Last epoch = {}'.format(start_epoch)) + if 'rank1' in checkpoint.keys(): + self.write_log( + '[INFO] Last rank1 = {:.1%}'.format(checkpoint['rank1'])) + if 'log' in checkpoint.keys(): + self.log = checkpoint['log'] + + return start_epoch