diff --git a/README.md b/README.md index b4843b2..d9da543 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ Put them under the dir of this project (or any other position if you use absolut Refer to this repository [VidVRD-tracklets](https://github.com/Dawn-LX/VidVRD-tracklets) (last Section of README.md) for more details about extracting features based on the given bbox positions. + As for classeme feature, For VidOR, we use the weighted average of category word embeddings, based on the classification probability vectors predicted by the detector. ("soft" classeme) For VidVRD, we just use the category word embeddings as classeme feature, i.e., "hard" classeme. refer to `tools_draft/extract_classeme.py` for more details. # Evaluation: **First, make sure you run `tools/dataloader_demo.py` successfully** diff --git a/tools_draft/construct_CatName2vec.py b/tools_draft/construct_CatName2vec.py new file mode 100644 index 0000000..a9b7a11 --- /dev/null +++ b/tools_draft/construct_CatName2vec.py @@ -0,0 +1,157 @@ + +import root_path +import numpy as np +from tqdm import tqdm + +from utils.categories_v2 import vidvrd_CatId2name,vidvrd_PredId2name,vidor_CatId2name,vidor_PredId2name,PKU_vidvrd_CatId2name + +file_path = "/home/gkf/project/2D-TAN/.vector_cache/glove.6B.300d.txt" # 6B means 6 billions +with open(file_path,'r') as f: + glove6B_300d = f.readlines() +glove6B_300d = [line.strip().split(" ") for line in glove6B_300d] +print(len(glove6B_300d)) + +glove6B_300d_dict = {} +words_list = [] +for word2vec in tqdm(glove6B_300d): + assert len(word2vec) == 301, "len(word2vec)={}".format(len(word2vec)) + word = word2vec[0] + words_list.append(word) + vector = [float(x) for x in word2vec[1:]] + glove6B_300d_dict[word] = np.array(vector) # shape == (300,) + +def get_wordvec(word): + global glove6B_300d_dict + words = word.split('_') + if len(words) == 1: + vector = glove6B_300d_dict[word] + elif len(words) ==2: + w1,w2 = words + vector = (glove6B_300d_dict[w1] + glove6B_300d_dict[w2])/2 + elif len(words) ==3: + w1,w2,w3 = words + vector = (glove6B_300d_dict[w1] + glove6B_300d_dict[w2] + glove6B_300d_dict[w3])/3 + else: + print(words) + assert False + + return vector + +def construct_vidvrd_entity(): + num_enti = len(vidvrd_CatId2name) + num_pred = len(vidvrd_PredId2name) + assert num_enti == 36 and num_pred == 133 + + ## entity name2vec: + enti_matrix = np.zeros(shape=(num_enti,300)) + for idx,name in vidvrd_CatId2name.items(): + if name == "__background__": + vector = np.zeros(shape=(300,)) + enti_matrix[idx] = vector + continue + + names = name.split('/') + if len(names) == 2: + n1,n2 = names + vector = (get_wordvec(n1) + get_wordvec(n2)) / 2 + elif len(names) == 1: + vector = get_wordvec(name) + else: + assert False + + enti_matrix[idx] = vector + np.save("tools/vidvrd_EntiNameEmb.npy",enti_matrix) + + pred_matrix = np.zeros(shape=(num_pred,300)) + for idx,name in vidvrd_PredId2name.items(): + if name == "__background__": + vector = np.zeros(shape=(300,)) + pred_matrix[idx] = vector + continue + + names = name.split('/') + if len(names) == 2: + n1,n2 = names + vector = (get_wordvec(n1) + get_wordvec(n2)) / 2 + elif len(names) == 1: + vector = get_wordvec(name) + else: + assert False + + pred_matrix[idx] = vector + np.save("tools/vidvrd_PredNameEmb.npy",pred_matrix) + + +def construct_vidvrd_entity_pku(): + num_enti = len(PKU_vidvrd_CatId2name) + num_pred = len(vidvrd_PredId2name) + assert num_enti == 36 and num_pred == 133 + + ## entity name2vec: + enti_matrix = np.zeros(shape=(num_enti,300)) + for idx,name in vidvrd_CatId2name.items(): + if name == "__background__": + vector = np.zeros(shape=(300,)) + enti_matrix[idx] = vector + continue + + names = name.split('/') + if len(names) == 2: + n1,n2 = names + vector = (get_wordvec(n1) + get_wordvec(n2)) / 2 + elif len(names) == 1: + vector = get_wordvec(name) + else: + assert False + + enti_matrix[idx] = vector + np.save("tools/vidvrd_EntiNameEmb_pku.npy",enti_matrix) + + + + +def construct_vidor_NameEmb(): + num_enti = len(vidor_CatId2name) + num_pred = len(vidor_PredId2name) + assert num_enti == 81 and num_pred == 51 + + ## entity name2vec: + enti_matrix = np.zeros(shape=(num_enti,300)) + for idx,name in vidor_CatId2name.items(): + if name == "__background__": + vector = np.zeros(shape=(300,)) + enti_matrix[idx] = vector + continue + + names = name.split('/') + if len(names) == 2: + n1,n2 = names + vector = (get_wordvec(n1) + get_wordvec(n2)) / 2 + elif len(names) == 1: + vector = get_wordvec(name) + else: + assert False + + enti_matrix[idx] = vector + np.save("tools/vidor_EntiNameEmb.npy",enti_matrix) + + ## predicate name2vec + pred_matrix = np.zeros(shape=(num_pred,300)) + for idx,name in vidor_PredId2name.items(): + if name == "__background__": + vector = np.zeros(shape=(300,)) + pred_matrix[idx] = vector + continue + + if name == "play(instrument)": + name = "play_instrument" + + vector = get_wordvec(name) + pred_matrix[idx] = vector + + np.save("tools/vidor_PredNameEmb.npy",pred_matrix) + +if __name__ == "__main__": + construct_vidvrd_entity_pku() + + diff --git a/tools_draft/extract_classeme.py b/tools_draft/extract_classeme.py new file mode 100644 index 0000000..83bfc35 --- /dev/null +++ b/tools_draft/extract_classeme.py @@ -0,0 +1,45 @@ +import root_path +import os +import torch +import numpy as np +import pickle +from tqdm import tqdm +np.set_printoptions(precision=4,linewidth=500) + +from utils.categories_v2 import vidor_categories + +loadpath = "tools/vidor_CatName2vec_dict.pkl" # a dict version of `vidor_EntiNameEmb.npy`, refer to `tools/construct_CatName2vec.py` +with open(loadpath,'rb') as f: + vidor_CatName2Vec = pickle.load(f) + +vidor_CatNames = [v["name"] for v in vidor_categories] +word_emb = [vidor_CatName2Vec[name] for name in vidor_CatNames] +word_emb = np.stack(word_emb,axis=0) +word_emb = word_emb[1:,:] # shape == (80,300) + +print(word_emb.shape) + +# load_dir = "/home/gkf/project/deepSORT/tracking_results/miss60_minscore0p3/VidORval_freq1_logits/" +# save_dir = "/home/gkf/project/deepSORT/tracking_results/miss60_minscore0p3/VidORval_freq1_classeme/" + +load_dir = "/home/gkf/project/MEGA_Pytorch/mega_boxfeatures/GT_boxfeatures/VidORtrain_freq1_logits/" +save_dir = "/home/gkf/project/MEGA_Pytorch/mega_boxfeatures/GT_boxfeatures/VidORtrain_freq1_classeme/" + +if not os.path.exists(save_dir): + os.makedirs(save_dir) + +filename_list = sorted(os.listdir(load_dir)) +for filename in tqdm(filename_list): + loadpath = os.path.join(load_dir,filename) + logits = np.load(loadpath) + logits = logits[:,1:] # shape == (N,80) + logits = torch.from_numpy(logits) + probs = torch.softmax(logits,dim=-1).numpy() # shape == (N,80) + classeme = np.dot(probs,word_emb) # shape == (N,300) + + save_name = filename.split('.')[0].split('logits')[0] + "clsme.npy" + save_path = os.path.join(save_dir,save_name) + np.save(save_path,classeme) + + + diff --git a/tools_draft/extract_classeme_demo.py b/tools_draft/extract_classeme_demo.py new file mode 100644 index 0000000..1a02bdc --- /dev/null +++ b/tools_draft/extract_classeme_demo.py @@ -0,0 +1,54 @@ +import root_path +import os +import numpy as np +import torch +import pickle +from tqdm import tqdm +np.set_printoptions(suppress=True,precision=4,linewidth=500) + +from utils.categories_v2 import vidor_categories + +loadpath = "tools/vidor_CatName2vec_dict.pkl" # a dict version of `vidor_EntiNameEmb.npy`, refer to `tools/construct_CatName2vec.py` +with open(loadpath,'rb') as f: + vidor_CatName2Vec = pickle.load(f) + + +vidor_CatNames = [v["name"] for v in vidor_categories] + +loadpath = "/home/gkf/project/deepSORT/tracking_results/miss60_minscore0p3/VidORval_freq1_logits/0001_2793806282_logits.npy" +logits = np.load(loadpath) +print(logits.shape) + +word_emb = [vidor_CatName2Vec[name] for name in vidor_CatNames] +word_emb = np.stack(word_emb,axis=0) +print(word_emb.shape) + +def demo(): + # 对每个类别的 embedding 加权平均,就是做个矩阵乘法 + logits = np.random.rand(3,5) + embs = np.random.randint(1,9,size=(5,7)) + + print(logits,logits.shape) + print(embs,embs.shape) + res = np.dot(logits,embs) + print(res,res.shape) + + res2 = [] + for lo in logits: + assert lo.shape == (5,) + xx = embs * lo[:,np.newaxis] + xx = np.sum(xx,axis=0) # shape == (7,) + res2.append(xx) + res2 = np.stack(res2,axis=0) + print(res2,res2.shape) + +logits = logits[:,1:] +logits = torch.from_numpy(logits) +probs = torch.softmax(logits,dim=-1).numpy() # shape == (N,80) +word_emb = word_emb[1:,:] # shape == (80,300) +print(probs.shape,word_emb.shape) +print(probs[1,:]) +print(logits[1,:].numpy()) + +weighted_emb = np.dot(probs,word_emb) # (N,80) x (80,300) +print(weighted_emb.shape) diff --git a/tools_draft/extract_logits.py b/tools_draft/extract_logits.py new file mode 100644 index 0000000..f629794 --- /dev/null +++ b/tools_draft/extract_logits.py @@ -0,0 +1,95 @@ +import numpy as np +import os +from tqdm import tqdm +import torch +import torch.nn as nn +torch.set_printoptions(sci_mode=False,precision=4) +class ClsFC(nn.Module): + def __init__(self,num_cls,in_dim): + super(ClsFC,self).__init__() + self.fc = nn.Linear(in_dim,num_cls) + + @torch.no_grad() + def forward(self,x): + return self.fc(x) + +def create_model(): + weight = "training_dir/COCO34ORfreq32_4gpu/model_0180000.pth" + # the weight has been released, + # refer to https://github.com/Dawn-LX/VidVRD-tracklets#quick-start + + state_dict = torch.load(weight) + state_dict = state_dict["model"] + # print(state_dict.keys()) + + cls_state_dict = { + "fc.weight":state_dict['module.roi_heads.box.predictor.cls_score.weight'].cpu(), + "fc.bias":state_dict['module.roi_heads.box.predictor.cls_score.bias'].cpu() + } + + model = ClsFC(81,1024) + model.load_state_dict(cls_state_dict) + + return model + +if __name__ == "__main__": + #NOTE originally in 10.12.86.103 + + dim_feature = 1024 + num_cls = 81 + cls_model = create_model() + device = torch.device("cuda:0") + cls_model = cls_model.cuda(device) + + load_dir = "/home/gkf/deepSORT/tracking_results/miss60_minscore0p3/" + save_dir = "/home/gkf/deepSORT/tracking_results/miss60_minscore0p3/VidORtrain_freq1_logits" + + res_path_list = [] + for part_id in range(1,15): + part_name = "VidORtrain_freq1_part{:02d}".format(part_id) + part_dir = os.path.join(load_dir,part_name) + paths = sorted(os.listdir(part_dir)) + paths = [os.path.join(part_dir,p) for p in paths] + res_path_list += paths + + assert len(res_path_list) == 7000 + for load_path in tqdm(res_path_list): + track_res = np.load(load_path,allow_pickle=True) + batch_features = [] + for box_info in track_res: + if not isinstance(box_info,list): + box_info = box_info.tolist() + assert len(box_info) == 6 or len(box_info) == 12 + dim_feature,"len(box_info)=={}".format(len(box_info)) + + if len(box_info) == 12 + dim_feature: + cat_id = box_info[7] + roi_feature = box_info[12:] + batch_features.append(roi_feature) + assert cat_id > 0 + else: + batch_features.append([0]*dim_feature) + + batch_features = torch.tensor(batch_features).float() + assert len(track_res) == batch_features.shape[0] + + cls_logits = cls_model(batch_features.to(device)) # shape == (N,81) + cls_logits = cls_logits.cpu().numpy() + save_path = os.path.join( + save_dir,load_path.split('/')[-1].split('.')[0] + "_logits.npy" + ) + np.save(save_path,cls_logits) + + print("finish") + + + + + + + + + + + + + diff --git a/tools_draft/extract_logits_demo.py b/tools_draft/extract_logits_demo.py new file mode 100644 index 0000000..64d2461 --- /dev/null +++ b/tools_draft/extract_logits_demo.py @@ -0,0 +1,93 @@ +import numpy as np +from tqdm import tqdm +import torch +import torch.nn as nn +torch.set_printoptions(sci_mode=False,precision=4) +class ClsFC(nn.Module): + def __init__(self,num_cls,in_dim): + super(ClsFC,self).__init__() + self.fc = nn.Linear(in_dim,num_cls) + + @torch.no_grad() + def forward(self,x): + return self.fc(x) + +def create_model(): + weight = "training_dir/COCO34ORfreq32_4gpu/model_0180000.pth" + # the weight has been released, + # refer to https://github.com/Dawn-LX/VidVRD-tracklets#quick-start + + + state_dict = torch.load(weight) + state_dict = state_dict["model"] + # print(state_dict.keys()) + + cls_state_dict = { + "fc.weight":state_dict['module.roi_heads.box.predictor.cls_score.weight'].cpu(), + "fc.bias":state_dict['module.roi_heads.box.predictor.cls_score.bias'].cpu() + } + + + + + model = ClsFC(81,1024) + model.load_state_dict(cls_state_dict) + + return model + +if __name__ == "__main__": + dim_feature = 1024 + num_cls = 81 + #NOTE originally in 10.12.86.103 + loadfile = "/home/gkf/deepSORT/tracking_results/miss60_minscore0p3/VidORtrain_freq1_part01/0000_2401075277.npy" + track_res = np.load(loadfile,allow_pickle=True) + batch_features = [] + cat_ids = [] + scores = [] + for box_info in track_res: + if not isinstance(box_info,list): + box_info = box_info.tolist() + assert len(box_info) == 6 or len(box_info) == 12 + dim_feature,"len(box_info)=={}".format(len(box_info)) + + if len(box_info) == 12 + dim_feature: + score = box_info[6] + cat_id = box_info[7] + roi_feature = box_info[12:] + batch_features.append(roi_feature) + cat_ids.append(cat_id) + scores.append(score) + assert cat_id > 0 + else: + batch_features.append([0]*dim_feature) + cat_ids.append(0) + scores.append(0) + + cat_ids = torch.tensor(cat_ids) + scores = torch.tensor(scores) + batch_features = torch.tensor(batch_features).float() + assert len(track_res) == len(cat_ids) + print(cat_ids.shape,batch_features.shape) + + cls_model = create_model() + cls_logits = cls_model(batch_features) + cls_logits = torch.softmax(cls_logits,dim=-1) + cls_logits[:,0] = -1 + print(cls_logits.shape) + predicted_ids = torch.argmax(cls_logits,dim=-1) + print(predicted_ids.shape) + pre_scores = cls_logits[range(7310),predicted_ids] + + assert predicted_ids.shape == cat_ids.shape + mask = cat_ids > 0 + is_equal = predicted_ids[mask] == cat_ids[mask] + ratio = is_equal.sum() / len(is_equal) + print(is_equal,is_equal.shape,ratio) + + + not_equal = (~ is_equal).nonzero().reshape(-1).tolist() + index = not_equal[0] + print(cls_logits[mask][index,:]) + print("fc_cls argmax",predicted_ids[mask][index],pre_scores[mask][index]) + print("detection cls",cat_ids[mask][index],scores[mask][index]) + + diff --git a/tools_draft/extract_logits_val.py b/tools_draft/extract_logits_val.py new file mode 100644 index 0000000..6a643b0 --- /dev/null +++ b/tools_draft/extract_logits_val.py @@ -0,0 +1,91 @@ +import numpy as np +import os +from numpy.core.fromnumeric import sort +from tqdm import tqdm +import torch +import torch.nn as nn +torch.set_printoptions(sci_mode=False,precision=4) +class ClsFC(nn.Module): + def __init__(self,num_cls,in_dim): + super(ClsFC,self).__init__() + self.fc = nn.Linear(in_dim,num_cls) + + @torch.no_grad() + def forward(self,x): + return self.fc(x) + +def create_model(): + weight = "training_dir/COCO34ORfreq32_4gpu/model_0180000.pth" + # the weight has been released, + # refer to https://github.com/Dawn-LX/VidVRD-tracklets#quick-start + + + state_dict = torch.load(weight,map_location=torch.device('cpu')) + state_dict = state_dict["model"] + # print(state_dict.keys()) + + cls_state_dict = { + "fc.weight":state_dict['module.roi_heads.box.predictor.cls_score.weight'].cpu(), + "fc.bias":state_dict['module.roi_heads.box.predictor.cls_score.bias'].cpu() + } + + model = ClsFC(81,1024) + model.load_state_dict(cls_state_dict) + + return model + +if __name__ == "__main__": + dim_feature = 1024 + num_cls = 81 + cls_model = create_model() + device = torch.device("cuda:0") + cls_model = cls_model.cuda(device) + + load_dir = "/home/gkf/project/deepSORT/tracking_results/nms0.5_miss30_score0.3/VidORval_freq1" + save_dir = "/home/gkf/project/deepSORT/tracking_results/nms0.5_miss30_score0.3/VidORval_freq1_logits" + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + # res_path_list = [] # for tr + # for part_id in range(1,15): + # part_name = "VidORtrain_freq1_part{:02d}".format(part_id) + # part_dir = os.path.join(load_dir,part_name) + # paths = sorted(os.listdir(part_dir)) + # paths = [os.path.join(part_dir,p) for p in paths] + # res_path_list += paths + # assert len(res_path_list) == 7000 + + res_path_list = sorted(os.listdir(load_dir)) + res_path_list = [os.path.join(load_dir,r) for r in res_path_list] + assert len(res_path_list) == 835 + for load_path in tqdm(res_path_list): + # print(load_path) + track_res = np.load(load_path,allow_pickle=True) + batch_features = [] + for box_info in track_res: + if not isinstance(box_info,list): + box_info = box_info.tolist() + assert len(box_info) == 6 or len(box_info) == 12 + dim_feature,"len(box_info)=={}".format(len(box_info)) + + if len(box_info) == 12 + dim_feature: + cat_id = box_info[7] + roi_feature = box_info[12:] + batch_features.append(roi_feature) + assert cat_id > 0 + else: + batch_features.append([0]*dim_feature) + + batch_features = torch.tensor(batch_features).float() + assert len(track_res) == batch_features.shape[0] + + cls_logits = cls_model(batch_features.to(device)) # shape == (N,81) + cls_logits = cls_logits.cpu().numpy() + save_path = os.path.join( + save_dir,load_path.split('/')[-1].split('.')[0] + "_logits.npy" + ) + np.save(save_path,cls_logits) + # print(save_path) + # break + + print("finish") + diff --git a/tools_draft/readme.md b/tools_draft/readme.md new file mode 100644 index 0000000..4763de5 --- /dev/null +++ b/tools_draft/readme.md @@ -0,0 +1,3 @@ +these scripts are draft + +it's good if they can benefit you \ No newline at end of file