diff --git a/ape/data/dataset_mapper.py b/ape/data/dataset_mapper.py
index d92a5b4..5bb00e2 100644
--- a/ape/data/dataset_mapper.py
+++ b/ape/data/dataset_mapper.py
@@ -5,7 +5,7 @@
 from detectron2.data import transforms as T
 from detectron2.data.dataset_mapper import DatasetMapper as DatasetMapper_d2
 
-from . import detection_utils as utils_sota
+from . import detection_utils as utils_ape
 
 """
 This file contains the default mapping that's applied to "dataset dicts".
@@ -33,7 +33,7 @@ class DatasetMapper_ape(DatasetMapper_d2):
 
     def __init__(self, cfg, is_train: bool = True):
         super().__init__(cfg, is_train)
-        augmentations = utils_sota.build_augmentation(cfg, is_train)
+        augmentations = utils_ape.build_augmentation(cfg, is_train)
         self.augmentations = T.AugmentationList(augmentations)
 
         logger = logging.getLogger(__name__)
diff --git a/ape/data/dataset_mapper_copypaste.py b/ape/data/dataset_mapper_copypaste.py
index 4f862b9..a1bfc46 100644
--- a/ape/data/dataset_mapper_copypaste.py
+++ b/ape/data/dataset_mapper_copypaste.py
@@ -17,7 +17,7 @@
 from detectron2.data.detection_utils import convert_image_to_rgb
 from detectron2.structures import BitMasks, Boxes, Instances
 
-from . import detection_utils as utils_sota
+from . import detection_utils as utils_ape
 from . import mapper_utils
 
 """
@@ -124,10 +124,10 @@ def __init__(
 
     @classmethod
     def from_config(cls, cfg, is_train: bool = True):
-        augs = utils_sota.build_augmentation(cfg, is_train)
+        augs = utils_ape.build_augmentation(cfg, is_train)
         augs_d2 = utils.build_augmentation(cfg, is_train)
-        augs_aa = utils_sota.build_augmentation_aa(cfg, is_train)
-        augs_lsj = utils_sota.build_augmentation_lsj(cfg, is_train)
+        augs_aa = utils_ape.build_augmentation_aa(cfg, is_train)
+        augs_lsj = utils_ape.build_augmentation_lsj(cfg, is_train)
         if cfg.INPUT.CROP.ENABLED and is_train:
             raise NotImplementedError("cfg.INPUT.CROP.ENABLED is not supported yet")
             augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
diff --git a/ape/data/mapper_utils.py b/ape/data/mapper_utils.py
index b75ed35..dabbc84 100644
--- a/ape/data/mapper_utils.py
+++ b/ape/data/mapper_utils.py
@@ -380,7 +380,6 @@ def copypaste(dataset_dict, dataset_dict_bg, image_format, instance_mask_format)
         return None, None
 
 
-# from SotA-T/ape/data/datasets/coco.py
 def maybe_load_annotation_from_file(record, meta=None, extra_annotation_keys=None):
 
     file_name = record["file_name"]
diff --git a/ape/layers/zero_shot_fc.py b/ape/layers/zero_shot_fc.py
index e19622a..3551a63 100644
--- a/ape/layers/zero_shot_fc.py
+++ b/ape/layers/zero_shot_fc.py
@@ -6,9 +6,6 @@
 from torch import nn
 from torch.nn import functional as F
 
-# from sota.modeling.text import build_clip_text_encoder, get_clip_embeddings
-# from ..modeling.text import build_clip_text_encoder, get_clip_embeddings
-
 logger = logging.getLogger(__name__)
 
 
@@ -54,7 +51,7 @@ def __init__(
             torch.nn.init.normal_(self.linear.weight, std=0.01)
 
         if len(zs_vocabulary) > 0:
-            from sota.modeling.text import get_clip_embeddings
+            from ape.modeling.text import get_clip_embeddings
 
             logger.info("Generating weight for " + zs_vocabulary)
             zs_vocabulary = zs_vocabulary.split(",")
@@ -67,7 +64,7 @@ def __init__(
         elif zs_weight_path == "zeros":
             zs_weight = torch.zeros((zs_weight_dim, num_classes))
         elif zs_weight_path == "online":
-            from sota.modeling.text import build_clip_text_encoder
+            from ape.modeling.text import build_clip_text_encoder
 
             zs_weight = torch.zeros((zs_weight_dim, num_classes))
             self.text_encoder = build_clip_text_encoder(text_model, pretrain=True)
@@ -111,7 +108,7 @@ def forward(self, x, classifier=None):
             x = self.linear(x)
         if classifier is not None:
             if isinstance(classifier, str):
-                from sota.modeling.text import get_clip_embeddings
+                from ape.modeling.text import get_clip_embeddings
 
                 zs_weight = get_clip_embeddings(
                     self.text_encoder, classifier, prompt="", device=x.device
diff --git a/datasets/tools/odinw/convert.py b/datasets/tools/odinw/convert.py
index 9513c3d..857218c 100644
--- a/datasets/tools/odinw/convert.py
+++ b/datasets/tools/odinw/convert.py
@@ -6,7 +6,7 @@
 
 from detectron2.data import MetadataCatalog
 
-import sota_t
+import ape
 
 
 print(MetadataCatalog.keys())
diff --git a/datasets/tools/openimages2coco/utils.py b/datasets/tools/openimages2coco/utils.py
index aa762f6..7e4fe3c 100644
--- a/datasets/tools/openimages2coco/utils.py
+++ b/datasets/tools/openimages2coco/utils.py
@@ -9,7 +9,7 @@
 from tqdm import tqdm
 
 from detectron2.data.detection_utils import read_image
-from sota.data.mapper_utils import mask_to_polygons
+from ape.data.mapper_utils import mask_to_polygons
 
 
 def csvread(file):
diff --git a/demo/.gitattributes b/demo/.gitattributes
new file mode 100644
index 0000000..45dafa8
--- /dev/null
+++ b/demo/.gitattributes
@@ -0,0 +1,10 @@
+examples/094_56726435.jpg filter=lfs diff=lfs merge=lfs -text
+examples/199_3946193540.jpg filter=lfs diff=lfs merge=lfs -text
+examples/SolvayConference1927.jpg filter=lfs diff=lfs merge=lfs -text
+examples/TheGreatWall.jpg filter=lfs diff=lfs merge=lfs -text
+examples/Totoro01.png filter=lfs diff=lfs merge=lfs -text
+examples/Transformers.webp filter=lfs diff=lfs merge=lfs -text
+examples/013_438973263.jpg filter=lfs diff=lfs merge=lfs -text
+examples/Pisa.jpg filter=lfs diff=lfs merge=lfs -text
+examples/Terminator3.jpg filter=lfs diff=lfs merge=lfs -text
+examples/MatrixRevolutionForZion.jpg filter=lfs diff=lfs merge=lfs -text
diff --git a/demo/pre-requirements.txt b/demo/pre-requirements.txt
index 2bdd7b4..a68a3a5 100644
--- a/demo/pre-requirements.txt
+++ b/demo/pre-requirements.txt
@@ -1,4 +1,4 @@
 --index-url https://download.pytorch.org/whl/cu118
-pytorch==2.2.1
+torch==2.2.1
 torchvision==0.17.1
 torchaudio==2.2.1
diff --git a/tools/train_net.py b/tools/train_net.py
index 90fa29c..13d28fe 100644
--- a/tools/train_net.py
+++ b/tools/train_net.py
@@ -626,7 +626,6 @@ def main(args):
 
     default_setup(cfg, args)
 
-    setup_logger(cfg.train.output_dir, distributed_rank=comm.get_rank(), name="sota")
     setup_logger(cfg.train.output_dir, distributed_rank=comm.get_rank(), name="ape")
     setup_logger(cfg.train.output_dir, distributed_rank=comm.get_rank(), name="timm")
 
@@ -640,6 +639,7 @@ def main(args):
         logger = logging.getLogger("ape")
         logger.info("Model:\n{}".format(model))
         model.to(cfg.train.device)
+        model.to(torch.float16)
         model = create_ddp_model(model)
 
         ema.may_build_model_ema(cfg, model)
diff --git a/tools/train_net_fsdp.py b/tools/train_net_fsdp.py
index 004e54c..803c217 100644
--- a/tools/train_net_fsdp.py
+++ b/tools/train_net_fsdp.py
@@ -664,7 +664,6 @@ def main(args):
 
     default_setup(cfg, args)
 
-    setup_logger(cfg.train.output_dir, distributed_rank=comm.get_rank(), name="sota")
     setup_logger(cfg.train.output_dir, distributed_rank=comm.get_rank(), name="ape")
     setup_logger(cfg.train.output_dir, distributed_rank=comm.get_rank(), name="timm")