Skip to content

Commit

Permalink
add sup (CE)
Browse files Browse the repository at this point in the history
  • Loading branch information
doxawang committed Jan 6, 2022
1 parent 379c151 commit b8b1265
Show file tree
Hide file tree
Showing 11 changed files with 390 additions and 79 deletions.
5 changes: 3 additions & 2 deletions bash_files/pretrain/cifar/dino.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 ../../main_pretrain.py \
python3 ../../../main_pretrain.py \
--dataset $1 \
--backbone resnet18 \
--data_dir ./datasets \
Expand Down Expand Up @@ -37,4 +37,5 @@ python3 ../../main_pretrain.py \
--num_prototypes 4096 \
--base_tau_momentum 0.9995 \
--final_tau_momentum 1.0 \
--momentum_classifier
--momentum_classifier \
$2
94 changes: 78 additions & 16 deletions bash_files/pretrain/cifar/exe.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
rate = '0.60'
dataset = 'cifar10'
# poison_method = 'zoo-simclr'
poison_method = 'clb'
import time

def sweep_poison_rate(args):
i = 0
Expand All @@ -18,24 +23,78 @@ def sweep_poison_rate(args):
if i >= len(args.gpus):
return

def sweep_trigger(args):
def sweep_pretrain_method(args):
i = 0
# for dataset in ['cifar10', 'cifar100']:
# for method in 'dino'.split(' '):
for method in ['sup', 'supcon', 'simclr', 'mocov2plus', 'byol', 'simsiam', 'swav', 'dino', 'barlow']:
# for rate in '0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00'.split(' '):
gpu = args.gpus[i]
print(rate)

os.system(f"""
for file in /data/yfwang/solo-learn/poison_datasets/{dataset}/{poison_method}/gaussian_noise/{dataset}_{poison_method}_rate_{rate}_*.pt
do
# echo ${{file}}, {method}
# CUDA_VISIBLE_DEVICES={gpu} sh {method}.sh {dataset} " --poison_data ${{file}} --use_poison --checkpoint_dir /data/yfwang/solo-learn/pretrain/{dataset} " &
done
"""
)
i += 1
if i >= len(args.gpus):
return
time.sleep(1)


def sweep_eval(args):
i = 0
for dataset in ['cifar10', 'cifar100']:
for rate in '0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00'.split(' '):
gpu = args.gpus[i]
print(rate)
os.system(f"""
for file in /data/yfwang/solo-learn/poison_datasets/{dataset}/zoo-simclr/gaussian_noise/{dataset}_zoo-simclr_rate_{rate}_*.pt
do
# echo ${{file}}
CUDA_VISIBLE_DEVICES={gpu} sh simclr.sh {dataset} " --poison_data ${{file}} --use_poison --checkpoint_dir /data/yfwang/solo-learn/pretrain/{dataset} " &
done
"""
)
i += 1
if i >= len(args.gpus):
return
# for method in 'dino'.split(' '):
for method in ['sup','simclr']:
for poison_method in ['zoo-simclr', 'clb']:
# for apply_method in ['use_poison', 'eval_poison']:
for apply_method in ['eval_poison']:
# for rate in '0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00'.split(' '):
gpu = args.gpus[i]
print(rate)

os.system(f"""
for file in /data/yfwang/solo-learn/poison_datasets/{dataset}/{poison_method}/gaussian_noise/{dataset}_{poison_method}_rate_{rate}_*.pt
do
# echo ${{file}}, {method}
CUDA_VISIBLE_DEVICES={gpu} sh {method}.sh {dataset} " --poison_data ${{file}} --{apply_method} --checkpoint_dir /data/yfwang/solo-learn/pretrain/{dataset} " &
done
"""
)
i += 1
if i >= len(args.gpus):
return
time.sleep(1)

def sweep_cifar100(args):
i = 0
for dataset in ['cifar100']:
# for method in 'dino'.split(' '):
for method in ['sup','simclr']:
for poison_method in ['zoo-simclr', 'clb']:
# for apply_method in ['use_poison', 'eval_poison']:
for apply_method in ['use_poison']:
# for rate in '0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00'.split(' '):
gpu = args.gpus[i]
print(rate)

os.system(f"""
for file in /data/yfwang/solo-learn/poison_datasets/{dataset}/{poison_method}/gaussian_noise/{dataset}_{poison_method}_rate_{rate}_*.pt
do
# echo ${{file}}, {method}
CUDA_VISIBLE_DEVICES={gpu} sh {method}.sh {dataset} " --poison_data ${{file}} --{apply_method} --checkpoint_dir /data/yfwang/solo-learn/pretrain/{dataset} " &
done
"""
)
i += 1
if i >= len(args.gpus):
return
time.sleep(1)

if __name__ == "__main__":
import argparse
Expand All @@ -46,4 +105,7 @@ def sweep_trigger(args):
parser.add_argument('gpus', type=int, nargs="+", help="")

args = parser.parse_args()
sweep_poison_rate(args)
# sweep_pretrain_method(args)
# sweep_pretrain_method(args)
# sweep_eval(args)
sweep_cifar100(args)
31 changes: 31 additions & 0 deletions bash_files/pretrain/cifar/sup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
python3 ../../../main_pretrain.py \
--dataset $1 \
--backbone resnet18 \
--data_dir ./datasets \
--max_epochs 1000 \
--gpus 0 \
--accelerator gpu \
--precision 16 \
--optimizer sgd \
--scheduler warmup_cosine \
--lr 0.1 \
--classifier_lr 0.1 \
--weight_decay 1e-5 \
--batch_size 256 \
--num_workers 4 \
--crop_size 32 \
--brightness 0.0 \
--contrast 0.0 \
--saturation 0.0 \
--hue 0.0 \
--gaussian_prob 0.0 0.0 \
--crop_size 32 \
--num_crops_per_aug 1 1 \
--zero_init_residual \
--name sup-$1 \
--project solo-learn \
--entity doxawang \
--save_checkpoint \
--method sup \
$2 \
--wandb
104 changes: 80 additions & 24 deletions main_poison.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
from solo.utils.classification_dataloader import prepare_data_no_aug
from poisoning_utils import *

def main():
args = parse_args_linear()
def main_lfb(args):

assert args.backbone in BaseMethod._SUPPORTED_BACKBONES
backbone_model = {
Expand Down Expand Up @@ -119,14 +118,16 @@ def main():
# subset_indices = np.random.choice(len(train_features), 100*args.num_classes, replace=False)
# plot_tsne(train_features.cpu()[subset_indices], train_labels[subset_indices], args.num_classes)

# step 1: get anchor
num_poisons = int(args.poison_rate * len(train_features) / args.num_classes)


# step 1: get anchor
if args.target_class is None:
anchor_idx = untargeted_anchor_selection(train_features, num_poisons)
else:
all_index = torch.arange(len(train_features))
anchor_idx = all_index[train_labels == args.target_class][args.target_index]
anchor_idx = targeted_anchor_selection(train_features, train_labels, args.target_class, num_poisons)
# all_index = torch.arange(len(train_features))
# anchor_idx = all_index[train_labels == args.target_class][args.target_index]

anchor_feature = train_features[anchor_idx]
anchor_label = train_labels[anchor_idx]
Expand All @@ -135,6 +136,7 @@ def main():
# step 2: get poisoning subset by selecting KNN (including anchor itself)
poisoning_index = get_poisoning_indices(anchor_feature, train_features, num_poisons)
poisoning_index = poisoning_index.cpu()

# step 3: injecting triggers to the subset
pattern, mask = generate_trigger(trigger_type=args.trigger_type)
poison_images = add_trigger(train_images, pattern, mask, poisoning_index, args.trigger_alpha)
Expand All @@ -146,18 +148,6 @@ def main():
print('ratio of same-class (class {%d}) samples: %.4f ' % (
anchor_label, acc))


args.poison_data_name = "%s_%s_rate_%.2f_target_%s_trigger_%s_alpha_%.2f_class_%d_acc_%.4f" % (
args.dataset,
args.pretrain_method,
args.poison_rate,
args.target_class,
args.trigger_type,
args.trigger_alpha,
anchor_label,
acc)


poisoning_data = {
'clean_data': train_images,
'poison_data': poison_images,
Expand All @@ -167,18 +157,58 @@ def main():
'anchor_label': anchor_label,
'pattern': pattern,
'mask': mask,
'args': args,
'acc': acc,
}

args.save_dir = os.path.join(args.save_dir, args.dataset, args.pretrain_method, args.trigger_type)
return poisoning_data

os.makedirs(args.save_dir, exist_ok=True)
file_name = os.path.join(args.save_dir, args.poison_data_name + '.pt')

print('saving to %s' % file_name)
def main_clb(args):

train_loader, _, train_dataset, _ = prepare_data_no_aug(
args.dataset,
data_dir=args.data_dir,
train_dir=args.train_dir,
val_dir=args.val_dir,
batch_size=args.batch_size,
num_workers=args.num_workers,
)

train_images, train_labels = train_dataset.data, np.array(train_dataset.targets)
num_poisons = int(args.poison_rate * len(train_images) / args.num_classes)

assert args.target_class is not None
poisoning_index = torch.arange(len(train_images))[train_labels == args.target_class]
shuffle_idx = torch.randperm(len(poisoning_index))
poisoning_index = poisoning_index[shuffle_idx]
poisoning_index = poisoning_index[:num_poisons].cpu()

anchor_label = args.target_class

# step 3: injecting triggers to the subset
pattern, mask = generate_trigger(trigger_type=args.trigger_type)
poison_images = add_trigger(train_images, pattern, mask, poisoning_index, args.trigger_alpha)

poisoning_labels = np.array(train_labels)[poisoning_index]

torch.save(poisoning_data, file_name)
acc = (poisoning_labels == anchor_label).astype(np.float).mean()

print('ratio of same-class (class {%d}) samples: %.4f ' % (
anchor_label, acc))

poisoning_data = {
'clean_data': train_images,
'poison_data': poison_images,
'targets': train_labels,
'poisoning_index': poisoning_index,
'anchor_data': None,
'anchor_label': anchor_label,
'pattern': pattern,
'mask': mask,
'acc': acc,
}

return poisoning_data


def test(model, data_loader):
Expand All @@ -197,5 +227,31 @@ def test(model, data_loader):
acc = float(total_correct) / len(data_loader.dataset)
return loss, acc


if __name__ == "__main__":
main()
args = parse_args_linear()

if args.pretrain_method == 'clb':
poison_data = main_clb(args)
else:
poison_data = main_lfb(args)

args.poison_data_name = "%s_%s_rate_%.2f_target_%s_trigger_%s_alpha_%.2f_class_%d_acc_%.4f" % (
args.dataset,
args.pretrain_method,
args.poison_rate,
args.target_class,
args.trigger_type,
args.trigger_alpha,
poison_data['anchor_label'],
poison_data['acc'])

args.save_dir = os.path.join(args.save_dir, args.dataset, args.pretrain_method, args.trigger_type)

os.makedirs(args.save_dir, exist_ok=True)
file_name = os.path.join(args.save_dir, args.poison_data_name + '.pt')
print('saving to %s' % file_name)

poison_data['args'] = args

torch.save(poison_data, file_name)
1 change: 0 additions & 1 deletion main_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def main():
else:
poison_data = None
poison_suffix = ''

if args.num_large_crops != 2:
assert args.method == "wmse"

Expand Down
44 changes: 30 additions & 14 deletions poisoning_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,36 @@ def untargeted_anchor_selection(train_features, num_poisons):
idx = torch.argmax(mean_top_sim)
return idx

def targeted_anchor_selection(train_features, train_labels, target_class, num_poisons, selection='first', budget=-1):
all_index = torch.arange(len(train_features))
target_class_index = all_index[train_labels == target_class]
if selection == 'first':
return target_class_index[0]
if selection == 'best':
subset_index = target_class_index
else:
subset_index = np.random.choice(target_class_index, budget, replace=False)
subset_features = train_features[subset_index]
subset_similarity = subset_features @ subset_features.T
mean_top_sim = torch.topk(subset_similarity, num_poisons, dim=1)[0].mean(dim=1)
idx = torch.argmax(mean_top_sim)
return subset_index[idx]

def targeted_anchor_selection(train_features, train_labels, target_class, num_poisons):
similarity = train_features @ train_features.T
mean_top_sim = torch.topk(similarity, num_poisons, dim=1)[0].mean(dim=1)
# for target_class in range(10):
from copy import deepcopy
tgt_sim = deepcopy(mean_top_sim)

tgt_sim[train_labels!=target_class] = -1
idx = torch.argmax(tgt_sim)
val = tgt_sim[idx]
print(target_class, (mean_top_sim > val).float().sum())
# import pdb; pdb.set_trace()
return idx


# def targeted_anchor_selection(train_features, train_labels, target_class, num_poisons, selection='first', budget=-1):
# all_index = torch.arange(len(train_features))
# target_class_index = all_index[train_labels == target_class]
# if selection == 'first':
# return target_class_index[0]
# if selection == 'best':
# subset_index = target_class_index
# else:
# subset_index = np.random.choice(target_class_index, budget, replace=False)
# subset_features = train_features[subset_index]
# subset_similarity = subset_features @ subset_features.T
# mean_top_sim = torch.topk(subset_similarity, num_poisons, dim=1)[0].mean(dim=1)
# idx = torch.argmax(mean_top_sim)
# return subset_index[idx]


def get_poisoning_indices(anchor_feature, train_features, num_poisons):
Expand Down
3 changes: 2 additions & 1 deletion solo/args/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def dataset_args(parser: ArgumentParser):
parser.add_argument("--target_class", type=int, default=None)
parser.add_argument("--save_dir", default=Path("datasets"), type=Path)
parser.add_argument("--poison_data", default=None, type=Path)
parser.add_argument("--pretrain_method", default=None, type=Path)
parser.add_argument("--pretrain_method", default=None, type=str)
parser.add_argument("--target_index", default=0, type=int)
parser.add_argument("--clb", action="store_true")


def augmentations_args(parser: ArgumentParser):
Expand Down
Loading

0 comments on commit b8b1265

Please sign in to comment.