diff --git a/configs/COCO_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_12ep_fsdp.py b/configs/COCO_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_12ep_fsdp.py index 9a377fe..405dc00 100644 --- a/configs/COCO_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_12ep_fsdp.py +++ b/configs/COCO_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_12ep_fsdp.py @@ -4,7 +4,6 @@ from detectron2.model_zoo import get_config as get_config_d2 from detrex.config import get_config as get_config_detrex from ape.modeling.backbone.vit import get_vit_lr_decay_rate - from ape.modeling.text import EVA02CLIP from ...common.backbone.vite_eva02_clip_1024 import backbone diff --git a/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_16x4_1080k.py b/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_16x4_1080k.py index 7b90022..defcede 100644 --- a/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_16x4_1080k.py +++ b/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_16x4_1080k.py @@ -95,12 +95,12 @@ clamp_min_for_underflow=True, clamp_max_for_overflow=True, use_checkpoint=True, - use_attention_mask_v=True, + # use_attention_mask_v=True, ) model.model_vision.transformer.encoder.use_act_checkpoint = True model.model_vision.text_feature_bank = True -model.model_vision.text_feature_bank_random_size = True +# model.model_vision.text_feature_bank_random_size = True model.model_vision.text_feature_reduce_before_fusion = True model.model_vision.text_feature_batch_repeat = True model.model_vision.expression_cumulative_gt_class = True diff --git a/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_32x2_540k_mdl_fsdp.py b/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_32x2_540k_mdl_fsdp.py index 607e129..bf20ea8 100644 --- a/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_32x2_540k_mdl_fsdp.py +++ b/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vite_eva02_clip_vlf_lsj1024_cp_32x2_540k_mdl_fsdp.py @@ -95,12 +95,12 @@ clamp_min_for_underflow=True, clamp_max_for_overflow=True, use_checkpoint=True, - use_attention_mask_v=True, + # use_attention_mask_v=True, ) model.model_vision.transformer.encoder.use_act_checkpoint = True model.model_vision.text_feature_bank = True -model.model_vision.text_feature_bank_random_size = True +# model.model_vision.text_feature_bank_random_size = True model.model_vision.text_feature_reduce_before_fusion = True model.model_vision.text_feature_batch_repeat = True model.model_vision.expression_cumulative_gt_class = True diff --git a/configs/LVIS_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_24ep_fsdp.py b/configs/LVIS_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_24ep_fsdp.py index 0106f87..97d0959 100644 --- a/configs/LVIS_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_24ep_fsdp.py +++ b/configs/LVIS_InstanceSegmentation/ape_deta/ape_deta_vite_eva02_clip_lsj1024_cp_24ep_fsdp.py @@ -10,7 +10,6 @@ model.model_vision.num_classes = 1203 model.model_vision.select_box_nums_for_evaluation = 300 -model.model_vision.test_score_thresh = 0.0 model.model_vision.criterion[0].num_classes = 1203 model.model_vision.criterion[0].use_fed_loss = True model.model_vision.criterion[0].get_fed_loss_cls_weights = lambda: get_fed_loss_cls_weights(