Skip to content

Commit

Permalink
20240511
Browse files Browse the repository at this point in the history
  • Loading branch information
CuteNPC committed May 10, 2024
1 parent 435ddf3 commit 2bef911
Show file tree
Hide file tree
Showing 37 changed files with 718 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -180,5 +180,5 @@ tests/**
originfile
.vscode
exchange_git.sh
data**
data
script copy
38 changes: 36 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## <strong><span style="color: red;">Note: Code for ImageNet-100 is still incomplete, please wait a moment, thank you!</span></strong>
## <strong><span style="color: red;">Note: Code is still incomplete, please wait a moment, thank you!</span></strong>

# No Label Backdoor

Expand Down Expand Up @@ -46,7 +46,41 @@ Train the classifier. Run the script in `script/cifar10_linear`

## Run on ImageNet-100

To be finished.
### Step 1:

Split training set into pretraining set and downstream set. Run

`python ./misc/imagnet_script/in100_split.py`

Create a copy with poison. Run

`python ./misc/imagnet_script/in100_add_trigger.py`

Create a folder with soft link to the clean dataset. Run

`python misc/imagnet_script/in100_link.py`

### Step 2:

Pretrain the clean encoder on pretraining set. Run the script in `script/cifar10_encoder`

### Step 3:

Select the poison subset. Run the script in `script/cifar10_poison`

### Step 4:

Create a folder with soft link to the clean dataset and poison subset. Run

`python misc/imagnet_script/in100_link_poison.py`

### Step 5:

Pretrain the backdoor encoder on pretraining set with poison subset. Run the script in `script/cifar10_pretrain`

### Step 6:

Train the classifier. Run the script in `script/cifar10_linear`

## Citation

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
72 changes: 72 additions & 0 deletions misc/imagnet_script/in100_add_trigger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import argparse
from torchvision.datasets import ImageFolder
from PIL import Image
from pathlib import Path
import numpy as np
import os
import random
import tqdm

patch = Image.open("misc/trigger_imnet.png")

random.seed(42)


def add_trigger(
sample: Image.Image,
trigger: str,
K: float
) -> Image.Image:
if trigger == "patch":
sample_array = np.array(sample)
shape = (
int(sample_array.shape[0] / K),
int(sample_array.shape[1] / K)
)
place = (
random.randint(int(shape[0]), int(shape[0]*(K-2))),
random.randint(int(shape[1]), int(shape[1]*(K-2)))
)
sample_array[place[0]:place[0]+shape[0],
place[1]:place[1] + shape[1]] = \
np.array(patch.resize((shape[1], shape[0])))
return Image.fromarray(sample_array)

else:
assert trigger == "patch"


def posion(normal_dir, poison_dir, K):

os.makedirs(poison_dir, exist_ok=True)

normal_dataloader = ImageFolder(normal_dir)
size = len(normal_dataloader)

for i in tqdm.tqdm(range(size)):
img = normal_dataloader[i][0]
normal_path = normal_dataloader.imgs[i][0]
poison_path = poison_dir / \
Path(normal_path[len(str(normal_dir))+1:])
newimg = add_trigger(img, "patch", K)
os.makedirs(poison_path.parent, exist_ok=True)
newimg.save(poison_path)


def main():

parser = argparse.ArgumentParser()
parser.add_argument("--imagenet_dir", type=Path,
default="./data/imagenet100")
args = parser.parse_args()

posion(args.imagenet_dir / "train",
args.imagenet_dir / "train_poison")
posion(args.imagenet_dir / "linear",
args.imagenet_dir / "linear_poison")
posion(args.imagenet_dir / "val",
args.imagenet_dir / "val_poison")


if __name__ == "__main__":
main()
37 changes: 37 additions & 0 deletions misc/imagnet_script/in100_link.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import argparse
from pathlib import Path
import os
import tqdm


def link(target_dir: Path, link_dir: Path):

os.makedirs(link_dir, exist_ok=False)

for folder in tqdm.tqdm(os.listdir(target_dir)):

os.makedirs(link_dir / Path(folder), exist_ok=False)

for filename in tqdm.tqdm(os.listdir(target_dir / Path(folder))):

target_path = (target_dir / Path(folder) / filename).absolute()
link_path = link_dir / Path(folder) / filename

os.system(f"ln -s {target_path} {link_path}")


def main():

parser = argparse.ArgumentParser()
parser.add_argument("--imagenet_dir", type=Path,
default="./data/imagenet100")
args = parser.parse_args()

link(args.imagenet_dir / "train",
args.imagenet_dir / "train_link")
link(args.imagenet_dir / "linear",
args.imagenet_dir / "linear_link")


if __name__ == "__main__":
main()
41 changes: 41 additions & 0 deletions misc/imagnet_script/in100_link_poison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import argparse
from torchvision.datasets import ImageFolder
from pathlib import Path
import os
import torch
import tqdm


def relink(imagenet_dir: Path, poison_name: Path):

orilink_dir: Path = imagenet_dir / "train_link"
newlink_dir: Path = imagenet_dir / "poison" / poison_name
poison_dir: Path = imagenet_dir / "train_poison"
pt_file: Path = imagenet_dir / "poison" / (str(poison_name) + ".pt")

os.makedirs(newlink_dir, exist_ok=True)
os.removedirs(newlink_dir)
os.system(f"cp -r {orilink_dir} {newlink_dir}")
dataloader = ImageFolder(newlink_dir)
poison_pt = torch.load(pt_file)
for i in tqdm.tqdm(poison_pt["poisoning_index"]):
path = dataloader.imgs[i][0]
sub_file_name = path[len(str(newlink_dir))+1:]
poi_path = poison_dir / sub_file_name
os.system(f"rm {path}")
os.system(f"ln -s {poi_path.absolute()} {path}")


def main():

parser = argparse.ArgumentParser()
parser.add_argument("--imagenet_dir", type=Path,
default="./data/imagenet100")
parser.add_argument("--poison_info", type=Path,
default="imagenet100-resnet18-clb-None-11-0.600-1-1.0000")
args = parser.parse_args()

relink(args.imagenet_dir, args.poison_info)

if __name__ == "__main__":
main()
41 changes: 41 additions & 0 deletions misc/imagnet_script/in100_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import argparse
from pathlib import Path
import os
import random
import shutil
import tqdm

random.seed(42)


def main():

parser = argparse.ArgumentParser()
parser.add_argument("--train", type=Path,
default="./data/imagenet100/train")
parser.add_argument("--linear", type=Path,
default="./data/imagenet100/linear")
parser.add_argument("--ratio", type=float, default=0.1)
args = parser.parse_args()
train_dir = args.train
linear_dir = args.linear
ratio = args.ratio

os.makedirs(linear_dir, exist_ok=False)

for subfolder in tqdm.tqdm(os.listdir(train_dir)):

train_folder = train_dir / Path(subfolder)
linear_folder = linear_dir / Path(subfolder)
os.makedirs(linear_folder, exist_ok=False)

file_list = os.listdir(train_folder)
subset_len = int(len(file_list) * ratio)
subset_file_list = random.sample(file_list, subset_len)

for file_name in subset_file_list:
shutil.move(train_folder / file_name, linear_folder / file_name)


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions script/cifar10_encoder/barlow.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down Expand Up @@ -32,4 +32,4 @@ python3 main_6pretrain.py \
--proj_hidden_dim 2048 \
--proj_output_dim 2048 \
--scale_loss 0.1 \
--random_seed 43
--random_seed 42
4 changes: 2 additions & 2 deletions script/cifar10_encoder/byol.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down Expand Up @@ -36,4 +36,4 @@ python3 main_6pretrain.py \
--base_tau_momentum 0.99 \
--final_tau_momentum 1.0 \
--momentum_classifier \
--random_seed 43
--random_seed 42
4 changes: 2 additions & 2 deletions script/cifar10_encoder/mocov2plus.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down Expand Up @@ -31,4 +31,4 @@ python3 main_6pretrain.py \
--base_tau_momentum 0.99 \
--final_tau_momentum 0.999 \
--momentum_classifier \
--random_seed 43
--random_seed 42
4 changes: 2 additions & 2 deletions script/cifar10_encoder/simclr.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down Expand Up @@ -33,4 +33,4 @@ python3 main_6pretrain.py \
--temperature 0.2 \
--proj_hidden_dim 2048 \
--proj_output_dim 256 \
--random_seed 43
--random_seed 42
2 changes: 1 addition & 1 deletion script/cifar10_linear/barlow.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_7linear.py \
python3 main_linear.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
2 changes: 1 addition & 1 deletion script/cifar10_linear/byol.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_7linear.py \
python3 main_linear.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
2 changes: 1 addition & 1 deletion script/cifar10_linear/mocov2plus.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_7linear.py \
python3 main_linear.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
2 changes: 1 addition & 1 deletion script/cifar10_linear/simclr.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_7linear.py \
python3 main_linear.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
2 changes: 1 addition & 1 deletion script/cifar10_poison/poison.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python main_4poison.py \
python main_poison.py \
--dataset \
cifar10 \
--backbone \
Expand Down
2 changes: 1 addition & 1 deletion script/cifar10_pretrain/barlow.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
2 changes: 1 addition & 1 deletion script/cifar10_pretrain/byol.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
2 changes: 1 addition & 1 deletion script/cifar10_pretrain/mocov2plus.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
1 change: 0 additions & 1 deletion script/cifar10_pretrain/note.md

This file was deleted.

2 changes: 1 addition & 1 deletion script/cifar10_pretrain/simclr.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python3 main_6pretrain.py \
python3 main_pretrain.py \
--dataset cifar10 \
--backbone resnet18 \
--data_dir ./data/cifar10 \
Expand Down
Loading

0 comments on commit 2bef911

Please sign in to comment.