diff --git a/NLOS_detr/configs/default.yaml b/NLOS_detr/configs/default.yaml
new file mode 100644
index 0000000..681242d
--- /dev/null
+++ b/NLOS_detr/configs/default.yaml
@@ -0,0 +1,67 @@
+model_configs:
+  model_name: PAC_Net
+  pretrained: true
+  rnn_type: gru
+  rnn_hdim: 128
+  v_loss: true
+  warm_up: 32
+  max_peo: 3
+
+dataset_configs:
+  dataset_root: /mnt/petrelfs/share_data/lisibo/NLOS/data_render_mot/ # fill your dataset path here!
+  # dataset_root: ./dataset/real_shot_new
+  # dataset_root: ../dataset/render
+  data_type: real_shot
+  train_ratio: 0.8
+  route_len: 128
+  # total_len: 250
+  noise_factor: 0
+  noisy: false
+  max_peo: 3
+
+loader_kwargs:
+  num_workers: 8
+  pin_memory: true
+  prefetch_factor: 8
+  persistent_workers: true
+
+train_configs:
+  project_name: my_project # fill your wandb project name here!
+  # resume: True
+  # resume_path: 2023_07_03_15_56_48/
+  resume: false
+  resume_path: None
+  batch_size: 16
+  seed: 1026
+  device: cuda:0
+  amp: true
+  v_loss_alpha: 500
+  x_loss_alpha: 1
+  m_loss_alpha: 100
+
+  loss_total_alpha: 1000
+
+optim_kwargs:
+  optimizer: AdamW
+  lr: 3.0e-4
+  weight_decay: 2.0e-3
+
+schedule_configs:
+  schedule_type: cosine
+  max_epoch: 120
+  cos_T: 70
+  cos_iters: 1
+  cos_mul: 2
+
+distributed_configs:
+  distributed: false
+  gpu_ids: 0
+  device_ids: 1
+  world_size: 1
+  local_rank: 0
+  port: 6666
+
+log_configs:
+  log_dir: log # fill your log dir here!
+  save_epoch_interval: 5
+  snapshot_interval: 100
\ No newline at end of file
diff --git a/NLOS_detr/data/__init__.py b/NLOS_detr/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/NLOS_detr/data/__pycache__/__init__.cpython-39.pyc b/NLOS_detr/data/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..f8f5835
Binary files /dev/null and b/NLOS_detr/data/__pycache__/__init__.cpython-39.pyc differ
diff --git a/NLOS_detr/data/__pycache__/dataset.cpython-39.pyc b/NLOS_detr/data/__pycache__/dataset.cpython-39.pyc
new file mode 100644
index 0000000..02ebd99
Binary files /dev/null and b/NLOS_detr/data/__pycache__/dataset.cpython-39.pyc differ
diff --git a/NLOS_detr/data/__pycache__/loader.cpython-39.pyc b/NLOS_detr/data/__pycache__/loader.cpython-39.pyc
new file mode 100644
index 0000000..a8b617a
Binary files /dev/null and b/NLOS_detr/data/__pycache__/loader.cpython-39.pyc differ
diff --git a/NLOS_detr/data/dataset.py b/NLOS_detr/data/dataset.py
new file mode 100644
index 0000000..83bcc8a
--- /dev/null
+++ b/NLOS_detr/data/dataset.py
@@ -0,0 +1,102 @@
+import os
+import random
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data.dataset import random_split
+from scipy.io import loadmat
+import pdb
+
+from .loader import npy_loader
+
+
+class TrackingDataset(Dataset):
+    def __init__(self,
+                 dataset_root: str,
+                 data_type: str = 'render',
+                 route_len: int = 128,
+                 use_fileclient: bool = False,
+                 noisy: bool = True,
+                 max_peo: int = 3,
+                 ** kwags) -> None:
+        self.dataset_root = dataset_root
+        self.max_peo = max_peo
+        self.num_frames = route_len
+        if data_type == 'render':
+            self.total_len = 256 
+            self.npy_name = 'video_128_noisy.npy' if noisy else 'video_128.npy'
+        elif data_type == 'real_shot':
+            self.total_len = 256 
+            self.npy_name = 'video_128.npy'
+
+
+        self.dataset_dir = dataset_root
+        self.dirs = []
+        for peo in [1,2,3]:
+            tmp_dir = [os.path.join(str(peo), d) for d in os.listdir(os.path.join(self.dataset_dir, str(peo)))]
+            self.dirs += tmp_dir[:600]
+        
+        print('dirs: ', len(self.dirs))
+        pdb.set_trace()
+
+        if use_fileclient:
+            self.npy_loader = npy_loader()
+            self.load_npy = self.npy_loader.get_item
+        else:
+            self.load_npy = np.load
+
+    def __len__(self):
+        return len(self.dirs)
+
+    def __getitem__(self, idx):
+        abs_png_dir = os.path.join(self.dataset_dir, self.dirs[idx])
+        npy_file = os.path.join(abs_png_dir, self.npy_name)
+        video = self.load_npy(npy_file)
+
+        start_frame = random.randint(0, self.total_len - self.num_frames)
+        video = video[:, start_frame:start_frame + self.num_frames]  # (3, T, H, W) or (3, T-1, H, W)
+
+        mat_file = loadmat(os.path.join(abs_png_dir, 'route.mat'))
+
+        route = mat_file['route'][start_frame:start_frame + self.num_frames]  # (T,)
+        route = route.reshape((route.shape[0], -1))  # (T, 2n)        
+        ## route: T*2n 对齐人数填充为 T*10。空缺填
+        ## route 按照奇数行起始点排序 ----route[0:1]为起始最靠左的人
+        npeo = route.shape[1] // 2
+        avg = []
+        # for p in range(npeo):
+        #     avg.append({"st_x":route[0,2*p],"idx":p})
+        
+        # avg.sort(key=lambda x:x["st_x"])
+        # tmp = np.zeros((route.shape[0], route.shape[1]))
+        # for i in range(npeo):
+        #     tmp[:,2*i:2*i+2] = route[:,2*avg[i]["idx"]:2*avg[i]["idx"]+2]
+        # route = tmp
+        # print('route', sum(route[:,0]), sum(route[:,2]))
+        for p in range(npeo):
+            avg.append({"sumx":sum(route[:,2*p]),"idx":p})
+        avg.sort(key=lambda x:x["sumx"])
+        tmp = np.zeros((route.shape[0], route.shape[1]))
+        for i in range(npeo):
+            tmp[:,2*i:2*i+2] = route[:,2*avg[i]["idx"]:2*avg[i]["idx"]+2]
+        route = tmp
+
+        route = np.concatenate((route, np.ones((route.shape[0], self.max_peo * 2 - route.shape[1])) * 0.5), axis=1) # (T，10)
+        assert route.shape[1] == self.max_peo * 2 and route.shape[0] == self.num_frames
+        map_size = mat_file['map_size']  # (1, 2)
+        ## mapsize ->[mapsize * 5]
+        map_size = np.tile(map_size, (1, self.max_peo))  # (1,10)
+
+        return torch.from_numpy(video), torch.from_numpy(route).float(), torch.from_numpy(map_size).float()
+
+
+def split_dataset(phase: str = 'train', train_ratio: float = 0.8, **kwargs):
+    full_dataset = TrackingDataset(**kwargs)
+
+    if phase == 'train':
+        train_size = int(len(full_dataset) * train_ratio)
+        val_size = len(full_dataset) - train_size
+        return random_split(full_dataset, [train_size, val_size])
+    elif phase == 'test':
+        return full_dataset
diff --git a/NLOS_detr/data/loader.py b/NLOS_detr/data/loader.py
new file mode 100644
index 0000000..9c037a3
--- /dev/null
+++ b/NLOS_detr/data/loader.py
@@ -0,0 +1,47 @@
+import io
+import os
+from typing import Tuple, Union
+
+import numpy as np
+import mmcv
+import torch
+from torchvision.io import read_image
+from torchvision.transforms.functional import resize
+
+
+def load_frames(
+        root: str,
+        frame_range: Union[None, Tuple[int, int]] = None,
+        output_size: Union[None, Tuple[int, int]] = None,
+        rgb_only=True
+) -> torch.Tensor:
+    frame_list = sorted([f for f in os.listdir(root) if f.endswith('.png')])
+    if frame_range is not None:
+        frame_list = frame_list[frame_range[0]: frame_range[1]]
+    frame_paths = [os.path.join(root, f) for f in frame_list]
+
+    C, H, W = read_image(frame_paths[0]).shape
+    frame_num = len(frame_list)
+    if C == 4 and rgb_only:
+        frames = torch.zeros((frame_num, 3, H, W))
+    else:
+        frames = torch.zeros((frame_num, C, H, W))
+    for i in range(frame_num):
+        frame = read_image(frame_paths[i])  # (C, H, W)
+        if C == 4 and rgb_only:
+            frame = frame[:3]
+        frames[i] = frame
+    if output_size is not None:
+        frames = resize(frames, size=output_size)
+    return frames  # (T, C, H, W)
+
+
+class npy_loader(object):
+    def __init__(self):
+        self.file_client = mmcv.fileio.FileClient(backend='petrel')
+
+    def get_item(self, file_path: str):
+        npy_buffer = self.file_client.get(file_path)
+        # return np.frombuffer(npy_buffer)
+        with io.BytesIO(npy_buffer) as f:
+            return np.load(f)  # , encoding='bytes', allow_pickle=True
diff --git a/NLOS_detr/data/preprocess.py b/NLOS_detr/data/preprocess.py
new file mode 100644
index 0000000..82382bb
--- /dev/null
+++ b/NLOS_detr/data/preprocess.py
@@ -0,0 +1,29 @@
+from torch import Tensor
+from torchvision.transforms.functional import resize
+
+
+def sub_mean(frames: Tensor) -> Tensor:
+    mean_frame = frames.mean(axis=0, keepdim=True)
+    frames_sub_mean = frames.sub(mean_frame)
+
+    return frames_sub_mean
+
+
+def diff(frames: Tensor) -> Tensor:
+    return frames[1:].sub(frames[:-1])
+
+
+def normalize(frame: Tensor):
+    return (frame - frame.min()) / (frame.max() - frame.min())
+
+
+def resize_video(frames: Tensor, bias_ratio: float = None, output_size: tuple = (128, 128)) -> Tensor:
+    T, C, H, W = frames.shape
+    crop_idx = (W - H) // 2
+    if bias_ratio is not None:
+        crop_idx -= int(W * bias_ratio)
+    output_frames = frames[:, :, :, crop_idx:crop_idx + H]
+    if output_size is not None:
+        output_frames = resize(output_frames, size=output_size)
+
+    return output_frames
diff --git a/NLOS_detr/train.py b/NLOS_detr/train.py
new file mode 100644
index 0000000..dc2ecfe
--- /dev/null
+++ b/NLOS_detr/train.py
@@ -0,0 +1,91 @@
+import argparse
+import os
+
+import torch
+import torch.multiprocessing as mp
+import yaml
+import pdb
+
+
+def main(cfg):
+    dist_cfgs = cfg['distributed_configs']
+
+    os.makedirs(cfg['log_configs']['log_dir'], exist_ok=True)
+    # os.environ["CUDA_VISIBLE_DEVICES"] = dist_cfgs['device_ids']
+    # os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+
+    world_size = len(dist_cfgs['device_ids'].split(','))
+    dist_cfgs['distributed'] = True if world_size > 1 else False
+    dist_cfgs['world_size'] = world_size
+    cfg['loader_kwargs']['batch_size'] = cfg['train_configs']['batch_size'] // world_size
+
+    print("Allocating workers...")
+    if dist_cfgs['distributed']:
+        mp.spawn(worker, nprocs=world_size, args=(cfg,))
+    else:
+        worker(0, cfg)
+
+
+def worker(rank, cfg):
+    torch.cuda.set_device(rank)
+    cfg['distributed_configs']['local_rank'] = rank
+
+    from utils.trainer import Trainer_tracking
+    trainer = Trainer_tracking(cfg)
+
+    trainer.run()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--cfg_file', type=str, default='default')
+
+    parser.add_argument('--model_name', type=str, required=True)
+    parser.add_argument('--warm_up', type=int, default=32)
+    parser.add_argument('--pretrained', action="store_true")
+    parser.add_argument('--rnn_hdim', type=int, default=128)
+
+    parser.add_argument('-b', '--batch_size', type=int, default=32)
+    parser.add_argument('--v_loss_alpha', type=float, default=500)
+    parser.add_argument('--loss_total_alpha', type=float, default=1000)
+    parser.add_argument('-r', '--resume', action='store_true', help='load previously saved checkpoint')
+
+    parser.add_argument('-lr_b', '--lr_backbone', type=float, default=3e-4)
+    parser.add_argument('-wd', '--weight_decay', type=float, default=2.0e-3)
+
+    parser.add_argument('-T', '--cos_T', type=int, default=70)
+
+    parser.add_argument('-g', '--gpu_ids', type=lambda x: x.replace(" ", ""), default='0',
+                        help='available gpu ids')
+    parser.add_argument('--port', type=str, default='6666', help='port number of distributed init')
+
+    args = parser.parse_args()
+
+    config_file = os.path.join('configs', f'{args.cfg_file}.yaml')
+    print(f'Reading config file: {config_file}')
+    with open(config_file, 'r') as stream:
+        config = yaml.load(stream, Loader=yaml.FullLoader)
+
+    config['model_configs']['warm_up'] = args.warm_up
+    config['model_configs']['pretrained'] = args.pretrained
+    config['model_configs']['rnn_hdim'] = args.rnn_hdim
+
+    config['dataset_configs']['route_len'] += args.warm_up
+
+    config['train_configs']['batch_size'] = args.batch_size
+    config['train_configs']['v_loss_alpha'] = args.v_loss_alpha
+    config['train_configs']['loss_total_alpha'] = args.loss_total_alpha
+    config['train_configs']['resume'] = args.resume
+
+    config['optim_kwargs']['lr'] = args.lr_backbone
+    config['optim_kwargs']['weight_decay'] = args.weight_decay
+
+    config['schedule_configs']['cos_T'] = args.cos_T
+
+    config['distributed_configs']['device_ids'] = args.gpu_ids
+    config['distributed_configs']['port'] = args.port
+
+    main(config)
+    ## seed 1026
+    # srun -p optimal --quotatype=auto --gres=gpu:1 -J NLOS_lisibo python train.py --model_name PAC-Net --pretrained --warm_up 32 -b 16  
\ No newline at end of file
diff --git a/NLOS_detr/utils/__init__.py b/NLOS_detr/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/NLOS_detr/utils/__pycache__/__init__.cpython-39.pyc b/NLOS_detr/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..03f2609
Binary files /dev/null and b/NLOS_detr/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/NLOS_detr/utils/__pycache__/metric.cpython-39.pyc b/NLOS_detr/utils/__pycache__/metric.cpython-39.pyc
new file mode 100644
index 0000000..5622300
Binary files /dev/null and b/NLOS_detr/utils/__pycache__/metric.cpython-39.pyc differ
diff --git a/NLOS_detr/utils/__pycache__/route2.cpython-39.pyc b/NLOS_detr/utils/__pycache__/route2.cpython-39.pyc
new file mode 100644
index 0000000..3d38dfb
Binary files /dev/null and b/NLOS_detr/utils/__pycache__/route2.cpython-39.pyc differ
diff --git a/NLOS_detr/utils/__pycache__/routes.cpython-39.pyc b/NLOS_detr/utils/__pycache__/routes.cpython-39.pyc
new file mode 100644
index 0000000..e0f7c9c
Binary files /dev/null and b/NLOS_detr/utils/__pycache__/routes.cpython-39.pyc differ
diff --git a/NLOS_detr/utils/__pycache__/tools.cpython-39.pyc b/NLOS_detr/utils/__pycache__/tools.cpython-39.pyc
new file mode 100644
index 0000000..7e93b80
Binary files /dev/null and b/NLOS_detr/utils/__pycache__/tools.cpython-39.pyc differ
diff --git a/NLOS_detr/utils/__pycache__/trainer.cpython-39.pyc b/NLOS_detr/utils/__pycache__/trainer.cpython-39.pyc
new file mode 100644
index 0000000..36466ef
Binary files /dev/null and b/NLOS_detr/utils/__pycache__/trainer.cpython-39.pyc differ
diff --git a/NLOS_detr/utils/__pycache__/vis.cpython-39.pyc b/NLOS_detr/utils/__pycache__/vis.cpython-39.pyc
new file mode 100644
index 0000000..7d4e49e
Binary files /dev/null and b/NLOS_detr/utils/__pycache__/vis.cpython-39.pyc differ
diff --git a/NLOS_detr/utils/metric.py b/NLOS_detr/utils/metric.py
new file mode 100644
index 0000000..9ddb281
--- /dev/null
+++ b/NLOS_detr/utils/metric.py
@@ -0,0 +1,78 @@
+import time
+
+from numpy import ndarray
+import similaritymeasures
+
+from torch import Tensor, no_grad
+from torch.nn import Module
+from torch.cuda.amp import autocast
+from tqdm import tqdm
+
+from .tools import get_device, AverageMeter
+
+
+def test_metrics(model: Module, loader, warm_up: int = None):
+    device = get_device(model)
+    model.eval()  # set model to evaluation mode
+    tic = time.time()
+
+    pcm_recorder = AverageMeter()
+    area_recorder = AverageMeter()
+    dtw_recorder = AverageMeter()
+
+    with no_grad():
+        with autocast():
+            for batch_idx, (X, Y) in tqdm(enumerate(loader)):
+                X = X.to(device)  # move to device, e.g. GPU
+                Y = Y.to(device)
+
+                batch_size, T = Y.shape[:2]
+
+                _, preds = model((X, Y))
+                if warm_up is not None:
+                    T = warm_up - T
+                    Y = Y[:, T:]
+                    preds = preds[:, T:]
+                pcm, area, dtw = compute_batch_metrics(preds, Y)
+                pcm_recorder.update(pcm.item(), batch_size)
+                area_recorder.update(area.item(), batch_size)
+                dtw_recorder.update(dtw.item(), batch_size)
+
+    print(f"Infer time: {time.time() - tic}")
+    print(f"pcm: {pcm_recorder.avg:.4f}\n"
+          f"area: {area_recorder.avg:.4f}\n"
+          f"dtw: {dtw_recorder.avg:.4f}\n")
+
+
+def compute_track_metrics(pred: ndarray, gt: ndarray):
+    ## pred gt: t*10
+    T = gt.shape[0]
+    pcm_tot = 0
+    area_tot = 0
+    dtw_tot = 0
+    cnt = 0
+    for p in range(3):
+        id1 = 2*p
+        id2 = id1+2
+        if all(gt[:,id1] == 0.5):
+            break
+        cnt += 1
+        pcm = similaritymeasures.pcm(gt[:,id1:id2], pred[:,id1:id2])
+        area = similaritymeasures.area_between_two_curves(gt[:,id1:id2], pred[:,id1:id2])
+        dtw = similaritymeasures.dtw(gt[:,id1:id2], pred[:,id1:id2])[0]
+        pcm_tot += pcm
+        area_tot += area
+        dtw_tot += dtw
+
+    return pcm_tot / (T*cnt), area_tot / (T*cnt), dtw_tot / (T*cnt)
+
+
+def compute_batch_metrics(preds: Tensor, labels: Tensor):
+    pcm, area, dtw = 0, 0, 0
+    B = labels.shape[0]
+    for gt, pred in zip(labels.cpu().numpy(), preds.detach().cpu().numpy()):
+        metrics = compute_track_metrics(gt, pred)
+        pcm += metrics[0]
+        area += metrics[1]
+        dtw += metrics[2]
+    return pcm/B, area/B, dtw/B
diff --git a/NLOS_detr/utils/route2.py b/NLOS_detr/utils/route2.py
new file mode 100644
index 0000000..baffac1
--- /dev/null
+++ b/NLOS_detr/utils/route2.py
@@ -0,0 +1,208 @@
+import os
+import time
+# from collections import namedtuple
+
+import numpy as np
+from numpy import ndarray
+from scipy.io import savemat, loadmat
+from shapely.geometry import Polygon, Point
+
+# from utils.vis import draw_route
+
+
+class route_generator_mot(object):
+    def __init__(self, map_size, forbidden_rate: float = 0.1, n_peo: int = 1):
+        self.n_peo = n_peo
+        self.forbidden_rate = forbidden_rate
+        self.route_length = None
+        self.map_size = map_size
+
+        x_min, x_max, y_min, y_max = (map_size[0] * self.forbidden_rate,
+                                      map_size[0] * (1 - self.forbidden_rate),
+                                      map_size[1] * self.forbidden_rate,
+                                      map_size[1] * (1 - self.forbidden_rate))
+        self.boundary = Polygon(((x_min, y_min), (x_min, y_max), (x_max, y_max), (x_max, y_min)))
+
+        self.e_position = None
+        self.e_route = None
+        self.c_route = None
+        self.velocities = None
+        ## 多人的情况
+        self.e_route_all = []
+        self.c_route_all = []
+        self.v_all = [] 
+        # 0.035m pre frame
+        self.v_range = (0.03, 0.04)
+
+    def _init_pv(self):
+        bounds = self.boundary.bounds
+        x = bounds[0] + (bounds[2] - bounds[0]) * np.random.rand()
+        y = bounds[1] + (bounds[3] - bounds[1]) * np.random.rand()
+        e_position = np.array([x, y])
+        velocity = np.random.rand(2).astype(np.float32) - 0.5
+        velocity = 0.035 * velocity / np.linalg.norm(velocity)
+
+        return e_position, velocity
+
+    def generate_route(self,
+                       route_length: int = 256,
+                       turn_rate: float = 0.15,
+                       verbose: bool = False):
+        self.route_length = route_length
+        
+        for i in range(self.n_peo):
+            ep, v = self._init_pv()
+            self.e_route_all.append([ep])
+            self.v_all.append([v])
+        
+        for i in range(self.route_length):
+            self.next_step(turn_rate)
+
+        # print(len(self.e_route_all), len(self.e_route_all[0]))
+
+        for i in range(self.n_peo):
+            c_route = [(self.e_route_all[i][j] + self.e_route_all[i][j + 1]) * 0.5 for j in range(self.route_length)]
+            c_route = np.stack(c_route)
+            self.c_route_all.append(c_route / self.map_size)
+
+        if verbose:
+            print(len(self.velocities), len(self.c_route))
+            print('velocities\n', np.stack(self.velocities))
+            print('route:\n', np.stack(self.c_route))
+
+    def next_step(self, turn_rate: float):
+        v_frame = []
+        for i in range(self.n_peo):
+            e_pos = self.e_route_all[i][-1]
+            v = self.v_all[i][-1].copy()
+            e_pos, v = self.check_boundary(e_pos, v)
+            e_pos, v = self.check_collision(e_pos, v, i)
+            v_frame.append(v)
+        
+        for i in range(self.n_peo):
+            self.e_route_all[i].append(self.e_route_all[i][-1] + v_frame[i])
+            delta_v = np.random.rand(2).astype(np.float32) - 0.5
+            delta_v /= np.linalg.norm(delta_v)
+            v_norm = self.v_range[0] + (self.v_range[1] - self.v_range[0]) * np.random.rand()
+
+            v_frame[i] += turn_rate * v_norm * delta_v
+            v_frame[i] *= v_norm / np.linalg.norm(v_frame[i])
+            self.v_all[i].append(v_frame[i].copy())
+
+        # import pdb
+        # print(v_frame)
+        # print(self.v_all)
+        # pdb.set_trace()
+
+
+    def check_boundary(self, epos, v):
+        point = Point(epos)
+        if not self.boundary.contains(point):
+            for i in range(2):
+                p = epos[i]
+                bound = self.boundary.bounds[i::2]
+                if p < min(bound) or p > max(bound):
+                    v[i] *= -1
+                    break
+                
+        return epos, v
+
+    def check_collision(self,epos,v,nowidx):
+        try_pos = epos + v
+        try_point = Point(try_pos)
+        point_cnt = 0
+        conflict_idx = []
+        for i in range(self.n_peo):
+            if i <= nowidx:
+                continue
+            p = Point(self.e_route_all[i][-1])
+            if p.distance(try_point) < 0.3:
+                point_cnt += 1
+                conflict_idx.append(i)
+        if point_cnt > 0:
+            pass
+            # print("conflict at: ", epos)
+
+
+        if point_cnt == 1:
+            ## 假设发生弹性碰撞
+            v1 = np.array(self.v_all[nowidx][-1])
+            v2 = np.array(self.v_all[conflict_idx[0]][-1])
+            ## 旋转使得v1,v2角度大于45度
+            if np.dot(v1, v2)/(np.linalg.norm(v1) * np.linalg.norm(v2)) < 0.707:
+                return epos, self.v_all[conflict_idx[0]][-1]
+            else:
+                x1 = self.e_route_all[nowidx][-1]
+                x2 = self.e_route_all[conflict_idx[0]][-1]
+                v1 = np.linalg.norm(v1) * (x1 - x2) / np.linalg.norm(x1 - x2)
+                v2 = np.linalg.norm(v2) * (x2 - x1) / np.linalg.norm(x2 - x1)
+                return epos, v1
+            
+        elif point_cnt > 1:
+            ## 三个人撞一块了，全部掉头
+            return epos, -1*v
+        else:
+            return epos, v
+            
+
+    def draw_route(self, cmap: str = 'viridis', normalize: bool = True):
+        route = np.stack(self.c_route) / np.array(self.map_size)
+        map_size = np.array((1, 1)) if normalize else self.map_size
+        draw_route(map_size, route, cmap, return_mode=None)
+
+    def save_route(self, save_root: str, verbose: bool = False):
+
+        time.sleep(1)
+        save_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
+        save_dir = os.path.join(save_root, save_time)
+        os.makedirs(save_dir, exist_ok=True)
+        mat_path = os.path.join(save_dir, 'route.mat')
+        map_size = np.array(self.map_size)
+        # print(map_size)
+        save_dict = {"map_size": map_size,
+                     "route": np.stack(self.c_route) / map_size,  # (T, 2)
+                     "velocities": np.stack(self.velocities[:-1])}
+
+        savemat(mat_path, save_dict)
+        if verbose:
+            print(f'Save data into {mat_path} successfully!')
+
+    def load_route(self,
+                   mat_name: str,
+                   save_dir: str):
+        if not os.path.exists(save_dir):
+            print(f"{save_dir} doesn't exist!")
+        if mat_name is None:
+            mat_names = sorted([f for f in os.listdir(save_dir) if f.endswith('.mat')])
+            mat_name = mat_names[-1]
+        mat_path = os.path.join(save_dir, mat_name)
+        save_dict = loadmat(mat_path)
+
+        self.e_route = [p for p in save_dict['route']]
+        self.velocities = [v for v in save_dict['velocities']]
+        print(f'Load data from {mat_path} successfully!')
+
+
+def fix_real_trajectory(route_clip: ndarray, threshold: int = 10):
+    bad_frames = find_miss_points(route_clip)
+    b_len = len(bad_frames)
+    counter = 1
+    for i, frame in enumerate(bad_frames):
+        if frame != 0:
+            if i < b_len - 1 and bad_frames[i + 1] == frame + 1:
+                counter += 1
+            else:
+                if counter <= threshold:
+                    div = counter + 1
+                    start_frame, end_frame = frame - counter, frame + 1
+                    for j in range(1, div):
+                        idx = start_frame + j
+                        route_clip[idx] = route_clip[start_frame] * (j / div) \
+                                          + route_clip[end_frame] * (1 - j / div)
+                counter = 1
+
+    return route_clip
+
+
+def find_miss_points(route_clip: ndarray):
+    return np.argwhere(route_clip == 0)[::2, 0]
diff --git a/NLOS_detr/utils/routes.py b/NLOS_detr/utils/routes.py
new file mode 100644
index 0000000..6cc570a
--- /dev/null
+++ b/NLOS_detr/utils/routes.py
@@ -0,0 +1,146 @@
+import os
+import time
+# from collections import namedtuple
+
+import numpy as np
+from numpy import ndarray
+from scipy.io import savemat, loadmat
+from shapely.geometry import Polygon, Point
+
+# from utils.vis import draw_route
+
+
+class route_generator(object):
+    def __init__(self, map_size: tuple[float] = (5, 5), forbidden_rate: float = 0.1):
+        self.forbidden_rate = forbidden_rate
+        self.route_length = None
+        self.map_size = map_size
+
+        x_min, x_max, y_min, y_max = (map_size[0] * self.forbidden_rate,
+                                      map_size[0] * (1 - self.forbidden_rate),
+                                      map_size[1] * self.forbidden_rate,
+                                      map_size[1] * (1 - self.forbidden_rate))
+        self.boundary = Polygon(((x_min, y_min), (x_min, y_max), (x_max, y_max), (x_max, y_min)))
+
+        self.e_position = None
+        self.e_route = None
+        self.c_route = None
+        self.velocities = None
+
+        # 0.035m pre frame
+        self.v_range = (0.03, 0.04)
+
+    def _init_pv(self):
+        bounds = self.boundary.bounds
+        x = bounds[0] + (bounds[2] - bounds[0]) * np.random.rand()
+        y = bounds[1] + (bounds[3] - bounds[1]) * np.random.rand()
+        self.e_position = np.array([x, y])
+        self.e_route = [self.e_position.copy()]
+
+        self.velocity = np.random.rand(2).astype(np.float32) - 0.5
+        self.velocity = 0.035 * self.velocity / np.linalg.norm(self.velocity)
+        self.velocities = [self.velocity.copy()]
+
+    def generate_route(self,
+                       route_length: int = 256,
+                       turn_rate: float = 0.15,
+                       verbose: bool = False):
+        self.route_length = route_length
+
+        self._init_pv()
+        for step in range(route_length):
+            # print(self.velocity)
+            self.next_step(turn_rate=turn_rate)
+            self.e_route.append(self.e_position.copy())
+            self.velocities.append(self.velocity.copy())
+
+        self.c_route = [(self.e_route[i] + self.e_route[i + 1]) / 2 for i in range(len(self.e_route) - 1)]
+
+        if verbose:
+            print(len(self.velocities), len(self.c_route))
+            print('velocities\n', np.stack(self.velocities))
+            print('route:\n', np.stack(self.c_route))
+
+    def next_step(self, turn_rate: float):
+        self.e_position += self.velocity
+        self.check_boundary()
+
+        delta_v = np.random.rand(2).astype(np.float32) - 0.5
+        delta_v /= np.linalg.norm(delta_v)
+
+        v_norm = self.v_range[0] + (self.v_range[1] - self.v_range[0]) * np.random.rand()
+        self.velocity += turn_rate * v_norm * delta_v
+        self.velocity *= v_norm / np.linalg.norm(self.velocity)
+
+    def check_boundary(self):
+        point = Point(self.e_position)
+        if not self.boundary.contains(point):
+            for i in range(2):
+                p = self.e_position[i]
+                bound = self.boundary.bounds[i::2]
+                if p < min(bound) or p > max(bound):
+                    self.velocity[i] *= -1
+                    break
+        self.e_position = self.e_route[-1] + self.velocity
+
+    def draw_route(self, cmap: str = 'viridis', normalize: bool = True):
+        route = np.stack(self.c_route) / np.array(self.map_size)
+        map_size = np.array((1, 1)) if normalize else self.map_size
+        draw_route(map_size, route, cmap, return_mode=None)
+
+    def save_route(self, save_root: str, verbose: bool = False):
+
+        time.sleep(1)
+        save_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
+        save_dir = os.path.join(save_root, save_time)
+        os.makedirs(save_dir, exist_ok=True)
+        mat_path = os.path.join(save_dir, 'route.mat')
+        map_size = np.array(self.map_size)
+        # print(map_size)
+        save_dict = {"map_size": map_size,
+                     "route": np.stack(self.c_route) / map_size,  # (T, 2)
+                     "velocities": np.stack(self.velocities[:-1])}
+
+        savemat(mat_path, save_dict)
+        if verbose:
+            print(f'Save data into {mat_path} successfully!')
+
+    def load_route(self,
+                   mat_name: str,
+                   save_dir: str):
+        if not os.path.exists(save_dir):
+            print(f"{save_dir} doesn't exist!")
+        if mat_name is None:
+            mat_names = sorted([f for f in os.listdir(save_dir) if f.endswith('.mat')])
+            mat_name = mat_names[-1]
+        mat_path = os.path.join(save_dir, mat_name)
+        save_dict = loadmat(mat_path)
+
+        self.e_route = [p for p in save_dict['route']]
+        self.velocities = [v for v in save_dict['velocities']]
+        print(f'Load data from {mat_path} successfully!')
+
+
+def fix_real_trajectory(route_clip: ndarray, threshold: int = 10):
+    bad_frames = find_miss_points(route_clip)
+    b_len = len(bad_frames)
+    counter = 1
+    for i, frame in enumerate(bad_frames):
+        if frame != 0:
+            if i < b_len - 1 and bad_frames[i + 1] == frame + 1:
+                counter += 1
+            else:
+                if counter <= threshold:
+                    div = counter + 1
+                    start_frame, end_frame = frame - counter, frame + 1
+                    for j in range(1, div):
+                        idx = start_frame + j
+                        route_clip[idx] = route_clip[start_frame] * (j / div) \
+                                          + route_clip[end_frame] * (1 - j / div)
+                counter = 1
+
+    return route_clip
+
+
+def find_miss_points(route_clip: ndarray):
+    return np.argwhere(route_clip == 0)[::2, 0]
diff --git a/NLOS_detr/utils/tools.py b/NLOS_detr/utils/tools.py
new file mode 100644
index 0000000..381ee81
--- /dev/null
+++ b/NLOS_detr/utils/tools.py
@@ -0,0 +1,96 @@
+import os
+import time
+
+import numpy as np
+import torch
+import yaml
+from matplotlib import pyplot as plt
+
+import models
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.avg = -1
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+
+def save_model(model, optimizer, scheduler, save_dir, acc=00):
+    model_paras = model.checkpoint()
+    optim_paras = optimizer.checkpoint()
+    scheduler_main_paras = scheduler.checkpoint()
+
+    save_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
+    save_path = os.path.join(save_dir, f'{acc:.1f}_{save_time}.pt')
+    torch.save({
+        "model_paras": model_paras,
+        "optim_paras": optim_paras,
+        "scheduler_paras": scheduler_main_paras
+    }, save_path)
+
+    print(f"\nSuccessfully saved model, optimizer and scheduler to {save_path}")
+
+
+def get_device(model):
+    if next(model.parameters()).device.type == 'cuda':
+        index = next(model.parameters()).device.index
+        device = torch.device(f'cuda:{index}')
+    else:
+        device = torch.device('cpu')
+    return device
+
+
+def load_model(run_name: str,
+               log_dir: str,
+               ckpt_name: str = 'best') -> torch.nn.Module:
+    run_dir = os.path.join(log_dir, run_name)
+    print(f'Loading model from {run_dir}...')
+    print(log_dir,run_name)
+    checkpoint = torch.load(os.path.join(run_dir, f'checkpoints/{ckpt_name}.pth'))
+    with open(os.path.join(run_dir, 'configs.yaml'), 'r') as stream:
+        run_config = yaml.load(stream, Loader=yaml.FullLoader)
+
+    model_dict = {
+        'PAC_Net': models.PAC_Net,
+        'P_Net': models.P_Net,
+        'C_Net': models.C_Net,
+        'baseline': models.NLOS_baseline
+    }
+
+    model_name = run_config['model_configs'].pop('model_name')
+    print('Min val loss is:', checkpoint['min_loss_total'])
+    model_builder = model_dict[model_name]
+    model = model_builder(**run_config['model_configs'])
+
+    load_state_dict = {k.replace('module.', ''): v for k, v in checkpoint['model'].items()}
+    model.load_state_dict(load_state_dict)
+    print('Successfully load model parameters!')
+
+    return model.eval()
+
+
+def fig2array(fig: plt.Figure):
+    from PIL import Image
+
+    fig.canvas.draw()
+
+    w, h = fig.canvas.get_width_height()
+    buf = np.fromstring(fig.canvas.tostring_argb(), dtype=np.uint8)
+    buf.shape = (w, h, 4)
+    buf = np.roll(buf, 3, axis=2)
+
+    image_array = Image.frombytes("RGBA", (w, h), buf.tostring())
+    image_array = np.asarray(image_array)
+    plt.close(fig)
+    return image_array
diff --git a/NLOS_detr/utils/trainer.py b/NLOS_detr/utils/trainer.py
new file mode 100644
index 0000000..994175f
--- /dev/null
+++ b/NLOS_detr/utils/trainer.py
@@ -0,0 +1,584 @@
+import os
+import platform
+import random
+import time
+from decimal import Decimal
+
+import numpy as np
+import wandb
+import yaml
+from loguru import logger
+from prettytable import PrettyTable
+from tqdm import tqdm
+from fairscale.optim.oss import OSS
+import torch
+from torch import optim, nn, distributed
+from torch.cuda.amp import GradScaler, autocast
+from torch.backends import cudnn
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data import DistributedSampler, DataLoader
+
+import models
+from data.dataset import split_dataset
+from .vis import draw_routes, draw_routes_mot
+from .tools import AverageMeter, load_model
+from .metric import compute_batch_metrics
+
+import pdb
+
+
+# cudnn.benchmark = True
+
+
+def seed_worker(worker_id):
+    # print(torch.initial_seed())
+    worker_seed = torch.initial_seed() % 2 ** 32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+    torch.manual_seed(worker_seed)
+
+
+def _set_seed(seed, deterministic=False):
+    """
+    seed manually to make runs reproducible
+    Args:
+        seed (int): Seed to be used.
+        deterministic (bool): Whether to set the deterministic option
+        for CUDNN backend
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    if deterministic:
+        cudnn.deterministic = True
+        cudnn.benchmark = False
+
+
+class Trainer_Base(object):
+    def __init__(self, cfg):
+        tic = time.time()
+        self.dist_cfgs = cfg['distributed_configs']
+        if self.dist_cfgs['local_rank'] == 0:
+            logger.info("Loading configurations...")
+        self.cfg = cfg
+        self.model_cfgs = cfg['model_configs']
+        self.train_cfgs = cfg['train_configs']
+        self.dataset_cfgs = cfg['dataset_configs']
+        self.loader_kwargs = cfg['loader_kwargs']
+        self.optim_kwargs = cfg['optim_kwargs']
+        self.schedule_cfgs = cfg['schedule_configs']
+        self.log_cfgs = cfg['log_configs']
+
+        if self.dist_cfgs['local_rank'] == 0:
+            logger.info("Initializing trainer...")
+        if self.dist_cfgs['distributed']:
+            distributed.init_process_group(backend='nccl',
+                                           init_method='tcp://127.0.0.1:' + self.dist_cfgs['port'],
+                                           world_size=self.dist_cfgs['world_size'],
+                                           rank=self.dist_cfgs['local_rank'])
+        _set_seed(self.train_cfgs['seed'] + self.dist_cfgs['local_rank'], deterministic=True)
+        if torch.cuda.is_available():
+            self.device = f'cuda:{self.dist_cfgs["local_rank"]}'
+        else:
+            self.device = "cpu"
+        self.dist_cfgs['device'] = self.device
+
+        self.save_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
+        if self.log_cfgs['log_dir'].startswith('/'):
+            log_root = self.log_cfgs['log_dir']
+        else:
+            log_root = os.path.join(os.getcwd(), self.log_cfgs['log_dir'])
+        self.log_dir = os.path.join(log_root, self.save_time)
+        self.ckpt_dir = os.path.join(self.log_dir, 'checkpoints')
+        os.makedirs(self.ckpt_dir, exist_ok=True)
+        if self.dist_cfgs['local_rank'] == 0:
+            with open(os.path.join(self.log_dir, 'configs.yaml'), 'w', encoding="utf-8") as f:
+                yaml.safe_dump(self.cfg, f, default_flow_style=False, allow_unicode=True)
+
+        if self.dist_cfgs['local_rank'] == 0:
+            logger.info("Loading dataset...")
+        (self.train_loader, self.train_sampler), (self.val_loader, self.val_sampler) = self._load_dataset()
+
+        if self.dist_cfgs['local_rank'] == 0:
+            logger.info("Building model...")
+        self.model_name = self._build_model()
+        if self.dist_cfgs['distributed']:
+            self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+            self.model = DDP(self.model,
+                             device_ids=[self.dist_cfgs['local_rank']],
+                             output_device=self.dist_cfgs['local_rank'],
+                             find_unused_parameters=True)
+
+        if self.dist_cfgs['local_rank'] == 0:
+            logger.info("Loading optimizer...")
+        self._load_optimizer()
+
+        if self.dist_cfgs['local_rank'] == 0:
+            print(f"{time.time() - tic:.2f} sec are used to initialize a Trainer.")
+
+        self.start_epoch = 0
+        self.steps = 0
+        self.epoch = 0
+        self.train_min_loss = float('inf')
+
+    def _load_dataset(self):
+        raise NotImplementedError
+
+    def _build_model(self):
+        raise NotImplementedError
+
+    def _load_optimizer(self):
+        base_optimizer = None
+        optim_type = self.optim_kwargs.pop('optimizer')
+        if optim_type == 'SGD':
+            base_optimizer = optim.SGD
+            self.optim_kwargs['momentum'] = 0.9
+        elif optim_type == 'Adam':
+            base_optimizer = optim.Adam
+            # self.optim_kwargs['betas'] = (0.9, 0.999)
+        elif optim_type == 'AdamW':
+            base_optimizer = optim.AdamW
+            # self.optim_kwargs['betas'] = (0.9, 0.999)
+        else:
+            print(f"{optim_type} not support.")
+            exit(0)
+
+        if self.dist_cfgs['distributed']:
+            # Wrap a base optimizer into OSS
+            self.optimizer = OSS(
+                optim=base_optimizer,
+                params=self.model.parameters(),
+                **self.optim_kwargs,
+            )
+        else:
+            self.optimizer = base_optimizer(
+                params=self.model.parameters(),
+                **self.optim_kwargs,
+            )
+
+        if self.schedule_cfgs['schedule_type'] == 'cosine_warm':
+            self.schedule_cfgs['max_epoch'] = \
+                int((self.schedule_cfgs['cos_mul'] ** self.schedule_cfgs['cos_iters'] - 1) / \
+                    (self.schedule_cfgs['cos_mul'] - 1) * self.schedule_cfgs['cos_T'])
+            self.scheduler = \
+                optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer,
+                                                               T_0=self.schedule_cfgs['cos_T'],
+                                                               T_mult=self.schedule_cfgs['cos_mul'])
+        elif self.schedule_cfgs['schedule_type'] == 'cosine':
+            self.schedule_cfgs['max_epoch'] = self.schedule_cfgs['cos_T']
+            self.scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=self.schedule_cfgs['cos_T'])
+
+        if self.train_cfgs['amp']:
+            self.scaler = GradScaler()
+
+        self.optim_kwargs['optimizer'] = optim_type
+
+    def _init_recorder(self, log_train_cfg):
+        wandb.init(project=self.train_cfgs['project_name'],
+                   name=self.save_time, dir=self.log_dir, config=log_train_cfg,
+                   settings=wandb.Settings(start_method="fork"))
+        wandb.watch(self.model)
+
+        config_table = PrettyTable()
+        config_table.add_column('Phase', list(log_train_cfg))
+        config_table.add_column('Value', list(log_train_cfg.values()))
+        logger.info('\n' + config_table.get_string())
+
+    def load_checkpoint(self, path):
+        raise NotImplementedError
+
+
+class Trainer_tracking(Trainer_Base):
+    def __init__(self, cfg):
+        super(Trainer_tracking, self).__init__(cfg=cfg)
+
+        log_train_cfg = {
+            "model_name": self.model_name,
+            **self.model_cfgs,
+            "batch_size": self.train_cfgs['batch_size'],
+            "v_loss_alpha": self.train_cfgs['v_loss_alpha'],
+            "x_loss_alpha": self.train_cfgs['x_loss_alpha'],
+            "m_loss_alpha": self.train_cfgs['m_loss_alpha'],
+            "loss_total_alpha": self.train_cfgs['loss_total_alpha'],
+            "resume": self.train_cfgs['resume'],
+            "route_len": self.dataset_cfgs['route_len'],
+            "noise_factor": self.dataset_cfgs['noise_factor'],
+            **self.optim_kwargs,
+            "epochs": self.schedule_cfgs['max_epoch'],
+        }
+        
+        self.model_cfgs["x_loss_alpha"] = self.train_cfgs["x_loss_alpha"]
+        self.model_cfgs["v_loss_alpha"] = self.train_cfgs["v_loss_alpha"]
+        self.model_cfgs["m_loss_alpha"] = self.train_cfgs["m_loss_alpha"]
+
+        # self.model_cfgs["route_len"] = self.dataset_cfgs['route_len'] - 
+
+
+        if self.dist_cfgs['local_rank'] == 0:
+            self._init_recorder(log_train_cfg)
+
+        self.val_metrics = {'x_loss': 0.0,
+                            'v_loss': 0.0,
+                            'min_loss_total': float('inf'),
+                            'pcm': float('inf'),
+                            'area': float('inf'),
+                            'dtw': float('inf'),
+                            'best_epoch': 0}
+
+    def _load_dataset(self):
+        train_dataset, val_dataset = split_dataset(**self.dataset_cfgs)
+
+        if self.dist_cfgs['distributed']:
+            train_sampler = DistributedSampler(train_dataset, shuffle=True)
+            val_sampler = DistributedSampler(val_dataset, shuffle=True)
+        else:
+            train_sampler = None
+            val_sampler = None
+
+        train_loader = DataLoader(train_dataset, **self.loader_kwargs, worker_init_fn=seed_worker, drop_last=True)
+        val_loader = DataLoader(val_dataset, **self.loader_kwargs, worker_init_fn=seed_worker, drop_last=False)
+        return (train_loader, train_sampler), (val_loader, val_sampler)
+
+    def _build_model(self):
+        model_name = self.model_cfgs.pop('model_name')
+        model_builder = {
+            'PAC_Net': models.PAC_Net,
+            'P_Net': models.P_Net,
+            'C_Net': models.C_Net,
+            'baseline': models.NLOS_baseline,
+        }[model_name]
+
+        self.model = model_builder(**self.model_cfgs)
+        if self.train_cfgs['resume']:
+            checkpoint_path = self.train_cfgs['resume_path']
+            self.load_checkpoint(checkpoint_path)
+        self.model.to(self.device)
+
+        total = sum([param.nelement() for param in self.model.parameters()])
+        print("Number of parameter: %.2fM" % (total/1e6))
+
+        return model_name
+
+    def run(self):
+        if self.dist_cfgs['local_rank'] == 0:
+            logger.info("--- Begin to run! ---")
+        for epoch in range(self.start_epoch, self.schedule_cfgs['max_epoch']):
+
+            if self.dist_cfgs['distributed']:
+                self.train_sampler.set_epoch(epoch)
+
+            # pdb.set_trace()
+            train_loss, train_metric = self.train(epoch)
+            val_loss, val_metric = self.val(epoch)
+            self.epoch += 1  # (1->70)
+
+            if self.dist_cfgs['local_rank'] == 0:
+                for i, param_group in enumerate(self.optimizer.param_groups):
+                    wandb.log({f"optimizer/lr_group_{i}": param_group['lr']}, step=epoch + 1)
+                wandb.log({
+                    'Loss/train/loss_x': train_loss[0],
+                    'Loss/train/loss_v': train_loss[1],
+                    'Loss/train/loss_m': train_loss[2],
+                    'Loss/val/loss_x': val_loss[0],
+                    'Loss/val/loss_v': val_loss[1],
+                    'Loss/val/loss_m': val_loss[2],
+                    'Loss/val/min_loss_total': self.val_metrics['min_loss_total'],
+                }, step=epoch + 1)
+                wandb.log({
+                    'Metric/train/pcm': train_metric[0],
+                    'Metric/train/area': train_metric[1],
+                    'Metric/train/dtw': train_metric[2],
+                    'Metric/train/acc': train_metric[3],
+                    'Metric/val/pcm': val_metric[0],
+                    'Metric/val/area': val_metric[1],
+                    'Metric/val/dtw': val_metric[2],
+                    'Metric/val/acc': val_metric[3],
+                }, step=epoch + 1)
+                if self.epoch % 5 == 0:
+                    logger.info(f'Logging images...')
+                    self.test_plot(epoch=self.epoch, phase='train')
+                    self.test_plot(epoch=self.epoch, phase='val')
+
+            self.scheduler.step()
+
+            if ((epoch + 1) % self.log_cfgs['save_epoch_interval'] == 0) \
+                    or (epoch + 1) == self.schedule_cfgs['max_epoch']:
+                checkpoint_path = os.path.join(self.ckpt_dir, f"epoch_{(epoch + 1)}.pth")
+                self.save_checkpoint(checkpoint_path)
+
+        if self.dist_cfgs['local_rank'] == 0:
+            wandb.finish()
+
+        if self.dist_cfgs['distributed']:
+            distributed.destroy_process_group()
+
+    def train(self, epoch):
+        self.model.train()
+        len_loader = len(self.train_loader)
+        iter_loader = iter(self.train_loader)
+
+        # loss
+        x_loss_recorder = AverageMeter()
+        v_loss_recorder = AverageMeter()
+        m_loss_recorder = AverageMeter()
+        # metric
+        pcm_recorder = AverageMeter()
+        area_recorder = AverageMeter()
+        dtw_recorder = AverageMeter()
+
+        ##counting
+        acc_recorder = AverageMeter()
+
+        pbar = None
+        if self.dist_cfgs['local_rank'] == 0:
+            pbar = tqdm(total=len_loader,
+                        dynamic_ncols=True,
+                        ascii=(platform.version() == 'Windows'))
+
+        for step in range(len_loader):
+            try:
+                inputs, labels, map_sizes = next(iter_loader)
+            except Exception as e:
+                logger.critical(e)
+                exit()
+                continue
+
+            inputs = inputs.to(self.device)
+            labels = labels.to(self.device)
+            map_sizes = map_sizes.to(self.device)
+
+            batch_size = inputs.size(0)
+
+            if self.train_cfgs['amp']:
+                with autocast():
+                    (x_loss, v_loss, m_loss), (preds,acc) = self.model((inputs, labels))
+                
+                loss = x_loss * self.train_cfgs['x_loss_alpha'] + \
+                          v_loss * self.train_cfgs['v_loss_alpha'] + \
+                            m_loss * self.train_cfgs['m_loss_alpha']
+                
+                self.scaler.scale(loss).backward()
+                # nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=20, norm_type=2)
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+            else:
+                pass
+            self.optimizer.zero_grad()
+
+            self.steps += 1
+            x_loss = x_loss.detach().clone()
+            v_loss = v_loss.detach().clone()
+            m_loss = m_loss.detach().clone()
+
+            if self.dist_cfgs['distributed']:
+                distributed.reduce(x_loss, 0)
+                x_loss /= self.dist_cfgs['world_size']
+                v_loss /= self.dist_cfgs['world_size']
+                m_loss /= self.dist_cfgs['world_size']
+
+            x_loss_recorder.update(x_loss.item(), batch_size)
+            v_loss_recorder.update(v_loss.item(), batch_size)
+            m_loss_recorder.update(m_loss.item(), batch_size)
+
+            T = preds.shape[1]
+            preds = preds * map_sizes
+            labels = labels * map_sizes
+            pcm, area, dtw = compute_batch_metrics(preds, labels[:, -T:])
+            pcm_recorder.update(pcm.item(), batch_size)
+            area_recorder.update(area.item(), batch_size)
+            dtw_recorder.update(dtw.item(), batch_size)
+
+            acc_recorder.update(acc, batch_size)
+
+            if self.dist_cfgs['local_rank'] == 0:
+                last_lr = [param_group['lr'] for param_group in self.optimizer.param_groups]
+                last_lr_string = "lr " + ' '.join(f"{Decimal(lr):.2E}" for lr in last_lr)
+
+                pbar.set_description(
+                    f"train epoch {epoch + 1}/{self.schedule_cfgs['max_epoch']}  "
+                    # f"Iter {self.steps}/{len_loader * self.schedule_cfgs['max_epoch']}  "
+                    f"{last_lr_string}  "
+                    f"----  "
+                    f"x_loss: {x_loss_recorder.avg:.3E}  "
+                    f"v_loss: {v_loss_recorder.avg:.3E}  "
+                    f"m_loss: {m_loss_recorder.avg:.3E}  "
+                    f"----  "
+                    f"area: {area_recorder.avg:.4f}  "
+                    f"dtw: {dtw_recorder.avg:.3f}  "
+                    f"pcm: {pcm_recorder.avg:.3f}  "
+                )
+                pbar.update()
+
+                if self.steps % self.log_cfgs['snapshot_interval'] == 0:
+                    checkpoint_path = os.path.join(self.ckpt_dir, "latest.pth")
+                    self.save_checkpoint(checkpoint_path)
+
+        if self.dist_cfgs['local_rank'] == 0:
+            pbar.close()
+
+        return (x_loss_recorder.avg, v_loss_recorder.avg, m_loss_recorder.avg), \
+               (pcm_recorder.avg, area_recorder.avg, dtw_recorder.avg, acc_recorder.avg)
+
+    def val(self, epoch):
+        self.model.eval()
+        len_loader = len(self.val_loader)
+        iter_loader = iter(self.val_loader)
+
+        # loss
+        x_loss_recorder = AverageMeter()
+        v_loss_recorder = AverageMeter()
+        m_loss_recorder = AverageMeter()
+
+        # metric
+        pcm_recorder = AverageMeter()
+        area_recorder = AverageMeter()
+        dtw_recorder = AverageMeter()
+
+        acc_recorder = AverageMeter()
+
+        pbar = None
+        if self.dist_cfgs['local_rank'] == 0:
+            pbar = tqdm(total=len_loader,
+                        dynamic_ncols=True,
+                        ascii=(platform.version() == 'Windows'))
+
+        for step in range(len_loader):
+            try:
+                inputs, labels, map_sizes = next(iter_loader)
+            except Exception as e:
+                logger.critical(e)
+                continue
+
+            inputs = inputs.to(self.device)
+            labels = labels.to(self.device)
+            map_sizes = map_sizes.to(self.device)
+
+            batch_size = inputs.size(0)
+
+            with torch.no_grad():
+                if self.train_cfgs['amp']:
+                    with autocast():
+                        (x_loss, v_loss, m_loss), (preds,acc) = self.model((inputs, labels))
+                else:
+                    (x_loss, v_loss, m_loss), preds = self.model((inputs, labels))
+
+            x_loss = x_loss.detach().clone()
+            v_loss = v_loss.detach().clone()
+            m_loss = m_loss.detach().clone()
+
+            if self.dist_cfgs['distributed']:
+                distributed.reduce(x_loss, 0)
+                x_loss /= self.dist_cfgs['world_size']
+                v_loss /= self.dist_cfgs['world_size']
+                m_loss /= self.dist_cfgs['world_size']
+            x_loss_recorder.update(x_loss.item(), batch_size)
+            v_loss_recorder.update(v_loss.item(), batch_size)
+            m_loss_recorder.update(m_loss.item(), batch_size)
+
+            T = preds.shape[1]
+            preds = preds * map_sizes
+            labels = labels * map_sizes
+
+            pcm, area, dtw = compute_batch_metrics(preds, labels[:, -T:])
+            pcm_recorder.update(pcm.item(), batch_size)
+            area_recorder.update(area.item(), batch_size)
+            dtw_recorder.update(dtw.item(), batch_size)
+            acc_recorder.update(acc, batch_size)
+
+            if self.dist_cfgs['local_rank'] == 0:
+                pbar.set_description(
+                    f"val epoch {epoch + 1}/{self.schedule_cfgs['max_epoch']}  "
+                    f"Step {step}/{len_loader}  "
+                    f"------  "
+                    f"x_loss: {x_loss_recorder.avg:.3E}  "
+                    f"v_loss: {v_loss_recorder.avg:.3E}  "
+                    f"m_loss: {m_loss_recorder.avg:.3E}  "
+                    f"----  "
+                    f"area: {area_recorder.avg:.4f}  "
+                    f"dtw: {dtw_recorder.avg:.4f}  "
+                    f"pcm: {pcm_recorder.avg:.4f}  ")
+                pbar.update()
+
+        if self.dist_cfgs['local_rank'] == 0:
+            pbar.close()
+
+            self.val_metrics['x_loss'] = x_loss_recorder.avg
+            self.val_metrics['v_loss'] = v_loss_recorder.avg
+            self.val_metrics["m_loss"] = m_loss_recorder.avg
+
+            loss_total = x_loss_recorder.avg + v_loss_recorder.avg + m_loss_recorder.avg
+            if loss_total < self.val_metrics['min_loss_total']:
+                self.val_metrics['min_loss_total'] = loss_total
+                self.val_metrics['best_epoch'] = epoch + 1
+
+                checkpoint_path = os.path.join(self.ckpt_dir, "best.pth")
+                self.save_checkpoint(checkpoint_path)
+
+            self.val_metrics['pcm'] = pcm_recorder.avg
+            self.val_metrics['area'] = area_recorder.avg
+            self.val_metrics['dtw'] = dtw_recorder.avg
+            self.val_metrics['acc'] = acc_recorder.avg
+
+            names = ['x_loss', 'v_loss',"m_loss",'pcm', 'area', 'dtw', 'min_loss_total', 'best_epoch']
+            res_table = PrettyTable(names)
+            metrics = [self.val_metrics[name] for name in names]
+            res_table.add_row([f"{m:.4}" if type(m) is float else m for m in metrics[:-1]] + [metrics[-1]])
+
+            logger.info(f'Performance on validation set at epoch: {epoch + 1}\n' + res_table.get_string())
+
+        return (self.val_metrics['x_loss'], self.val_metrics['v_loss'], self.val_metrics['m_loss']), \
+               (self.val_metrics['pcm'], self.val_metrics['area'], self.val_metrics['dtw'], self.val_metrics['acc'])
+
+    def test_plot(self, epoch, phase: str):
+        assert phase in ['train', 'val']
+        self.model.eval()
+        iter_loader = iter(self.val_loader) if phase == 'val' else iter(self.train_loader)
+        frames, gt_routes, map_sizes = next(iter_loader)
+        frames = frames[:6].to(self.device)
+        gt_routes = gt_routes[:6].to(self.device)
+        map_sizes = map_sizes[:6].to(self.device)
+        with torch.no_grad():
+            if self.train_cfgs['amp']:
+                with autocast():
+                    loss, (pred_routes,_) = self.model((frames, gt_routes))
+            else:
+                loss, (pred_routes,_) = self.model((frames, gt_routes))
+
+        for idx, (gt, pred, map_size) in enumerate(zip(
+                gt_routes.cpu().numpy(), pred_routes.cpu().numpy(), map_sizes.cpu().numpy())):
+            mark_T = None if 'warmup' not in self.model_name else self.model_cfgs['warm_up']
+            # fig = draw_routes(routes=(gt, pred), return_mode='fig_array')
+            fig = draw_routes_mot(routes=(gt, pred), return_mode='fig_array')
+            # pcm, area, dtw = compute_track_metrics(gt, pred)
+            wandb.log({f'{phase} route:{idx}': wandb.Image(fig, caption=f"map_size: {map_size}")},
+                      step=epoch)
+
+    def save_checkpoint(self, path):
+        # self.optimizer.consolidate_state_dict()
+        if not os.path.exists(os.path.split(path)[0]):
+            os.makedirs(os.path.split(path)[0])
+
+        if self.dist_cfgs['local_rank'] == 0:
+            save_dict = {
+                'model': self.model.state_dict(),
+                # 'optimizer': self.optimizer.state_dict(),
+                'epoch': self.epoch,
+                'iteration': self.steps,
+                **self.val_metrics
+            }
+            torch.save(save_dict, path)
+
+    def load_checkpoint(self, path):
+        # ckpt = None
+        # if self.dist_cfgs['local_rank'] == 0:
+        #     ckpt = torch.load(path, map_location={'cuda:0': f'cuda:{self.dist_cfgs["local_rank"]}'})
+        # self.model.load_state_dict(ckpt['model'])
+        # self.optimizer.load_state_dict(ckpt['optimizer'])
+        # self.start_epoch = ckpt['epoch']
+        # self.steps = ckpt['iteration']
+        # self.val_metrics['best_epoch'] = ckpt['best_epoch']
+        # self.val_metrics['min_loss'] = ckpt['min_val_loss']
+        self.model = load_model(run_name=path,
+                                log_dir=self.log_cfgs['log_dir'])
diff --git a/NLOS_detr/utils/vis.py b/NLOS_detr/utils/vis.py
new file mode 100644
index 0000000..8369eed
--- /dev/null
+++ b/NLOS_detr/utils/vis.py
@@ -0,0 +1,129 @@
+import numpy as np
+from numpy import ndarray
+from matplotlib import pyplot as plt
+from matplotlib.collections import LineCollection
+
+from utils.tools import fig2array
+import pdb
+
+def draw_route(map_size: ndarray, route: ndarray,
+               cmap: str = 'viridis', return_mode: str = None):
+    route = route * map_size
+    route = route.reshape((-1, 1, 2))
+
+    idxs = np.array(range(route.shape[0]))
+    fig = plt.figure()
+    fig.patch.set_facecolor('none')
+    ax = fig.add_subplot(111)
+    ax.plot(route[:, 0, 0], route[:, 0, 1], '--', ms=5)
+
+    norm = plt.Normalize(idxs[0], idxs[-1])
+    segments = np.concatenate([route[:-1], route[1:]], axis=1)
+    lc = LineCollection(segments, cmap=cmap, norm=norm)
+    lc.set_array(idxs)
+    lc.set_linewidth(3)
+    line = ax.add_collection(lc)
+    fig.colorbar(line, ax=ax, ticks=idxs[::int(len(idxs) / 10)], label='step')
+
+    ax.set_xlim(0, map_size[0])
+    ax.set_xlabel('x')
+    ax.set_ylim(0, map_size[1])
+    ax.set_ylabel('y')
+    ax.set_aspect(1)
+
+    ax.grid(visible=False)
+
+    if return_mode is not None:
+        return fig
+    else:
+        fig.show()
+
+
+def draw_routes(routes: tuple[ndarray, ndarray], return_mode: str = None):
+    assert return_mode in ['plt_fig', 'fig_array', None]
+    titles = ('GT', 'pred')
+    cmaps = ('viridis', 'plasma')
+
+    fig, axes = plt.subplots(1, 2, figsize=(10, 5), constrained_layout=True)
+    fig.patch.set_facecolor('white')
+    axes = axes.flatten()
+
+    for i, route in enumerate(routes):
+        route = route.reshape((-1, 1, 2))
+        idxs = np.array(range(route.shape[0]))
+        axes[i].plot(route[:, 0, 0], route[:, 0, 1], '--', ms=5)
+
+        norm = plt.Normalize(idxs[0], idxs[-1])
+        segments = np.concatenate([route[:-1], route[1:]], axis=1)
+        lc = LineCollection(segments, cmap=cmaps[i], norm=norm)
+        lc.set_array(idxs)
+        lc.set_linewidth(3)
+        line = axes[i].add_collection(lc)
+        fig.colorbar(line, ax=axes[i], ticks=idxs[::int(len(idxs) / 10)], label='step', fraction=0.05)
+
+        axes[i].set_title(titles[i])
+        axes[i].set_xlim(0, 1)
+        axes[i].set_xlabel('x')
+        axes[i].set_ylim(0, 1)
+        axes[i].set_ylabel('y')
+        axes[i].set_aspect(1)
+        axes[i].grid(visible=False)
+
+    if return_mode is None:
+        fig.show()
+    elif return_mode == 'plt_fig':
+        return fig
+    elif return_mode == 'fig_array':
+        return fig2array(fig)
+
+def draw_routes_mot(routes: tuple[ndarray, ndarray], return_mode: str = None):
+    assert return_mode in ['plt_fig', 'fig_array', None]
+    titles = ('GT', 'pred')
+    cmaps = ('viridis', 'plasma')
+
+    fig, axes = plt.subplots(1, 2, figsize=(10, 5), constrained_layout=True)
+    fig.patch.set_facecolor('white')
+    axes = axes.flatten()
+
+    gt = routes[0]
+    # npeo = 0
+    # for i in range(gt.shape[1]):
+    #     if all(gt[:, i] == 0.5):
+    #         npeo = i//2
+    #         break
+    # if npeo == 0:
+    #     npeo = 5
+    npeo = 3
+    # pdb.set_trace()
+    # pdb.set_trace()
+    for i, route in enumerate(routes):
+        # route: t,10
+        for p in range(npeo):
+            nowroute = route[:, 2*p:2*p+2]
+            nowroute = nowroute.reshape((-1, 1, 2))
+            idxs = np.array(range(nowroute.shape[0]))
+            axes[i].plot(nowroute[:, 0, 0], nowroute[:, 0, 1], '-', ms=5)
+
+            norm = plt.Normalize(idxs[0], idxs[-1])
+            segments = np.concatenate([nowroute[:-1], nowroute[1:]], axis=1)
+            lc = LineCollection(segments, cmap=cmaps[i], norm=norm)
+            lc.set_array(idxs)
+            lc.set_linewidth(3)
+            line = axes[i].add_collection(lc)
+        fig.colorbar(line, ax=axes[i], ticks=idxs[::int(len(idxs) / 10)], label='step', fraction=0.05)
+
+        axes[i].set_title(titles[i])
+        axes[i].set_xlim(0, 1)
+        axes[i].set_xlabel('x')
+        axes[i].set_ylim(0, 1)
+        axes[i].set_ylabel('y')
+        axes[i].set_aspect(1)
+        axes[i].grid(visible=False)
+
+    if return_mode is None:
+        fig.show()
+    elif return_mode == 'plt_fig':
+        return fig
+    elif return_mode == 'fig_array':
+        return fig2array(fig)
+