From 441b47c443c163040536511f76a5ba2a086e4803 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 22 Oct 2021 20:02:19 +0200 Subject: [PATCH 01/47] More informative `EarlyStopping()` message (#5303) --- utils/torch_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utils/torch_utils.py b/utils/torch_utils.py index 352ecf572c9f..d1c48f73ea72 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -299,7 +299,10 @@ def __call__(self, epoch, fitness): self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch stop = delta >= self.patience # stop training if patience exceeded if stop: - LOGGER.info(f'EarlyStopping patience {self.patience} exceeded, stopping training.') + LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. ' + f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n' + f'To update EarlyStopping(patience={self.patience}) pass a new patience value, ' + f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.') return stop From b760acec11d28063ccfe30f704bac9aceaa0b854 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 22 Oct 2021 21:11:29 +0200 Subject: [PATCH 02/47] Created using Colaboratory --- tutorial.ipynb | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tutorial.ipynb b/tutorial.ipynb index 421ddbeaa15f..ee2e81998553 100644 --- a/tutorial.ipynb +++ b/tutorial.ipynb @@ -505,7 +505,7 @@ "id": "eyTZYGgRjnMc" }, "source": [ - "## COCO val2017\n", + "## COCO val\n", "Download [COCO val 2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L14) dataset (1GB - 5000 images), and test model accuracy." ] }, @@ -533,8 +533,8 @@ "outputId": "7e6f5c96-c819-43e1-cd03-d3b9878cf8de" }, "source": [ - "# Download COCO val2017\n", - "torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017val.zip', 'tmp.zip')\n", + "# Download COCO val\n", + "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../datasets && rm tmp.zip" ], "execution_count": null, @@ -567,7 +567,7 @@ "outputId": "3dd0e2fc-aecf-4108-91b1-6392da1863cb" }, "source": [ - "# Run YOLOv5x on COCO val2017\n", + "# Run YOLOv5x on COCO val\n", "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half" ], "execution_count": null, @@ -627,7 +627,7 @@ "id": "rc_KbFk0juX2" }, "source": [ - "## COCO test-dev2017\n", + "## COCO test\n", "Download [COCO test2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L15) dataset (7GB - 40,000 images), to test model accuracy on test-dev set (**20,000 images, no labels**). Results are saved to a `*.json` file which should be **zipped** and submitted to the evaluation server at https://competitions.codalab.org/competitions/20794." ] }, @@ -638,10 +638,9 @@ }, "source": [ "# Download COCO test-dev2017\n", - "torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels.zip', 'tmp.zip')\n", - "!unzip -q tmp.zip -d ../ && rm tmp.zip # unzip labels\n", - "!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 7GB, 41k images\n", - "%mv ./test2017 ../coco/images # move to /coco" + "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017labels.zip', 'tmp.zip')\n", + "!unzip -q tmp.zip -d ../datasets && rm tmp.zip # unzip labels\n", + "!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d ../datasets/coco/images # 7GB 41k images" ], "execution_count": null, "outputs": [] @@ -652,7 +651,7 @@ "id": "29GJXAP_lPrt" }, "source": [ - "# Run YOLOv5s on COCO test-dev2017 using --task test\n", + "# Run YOLOv5s on COCO test\n", "!python val.py --weights yolov5s.pt --data coco.yaml --task test" ], "execution_count": null, From 79d8f1f6785dcaabc018b40a6ffd1d0a4c1d25bf Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 22 Oct 2021 21:19:23 +0200 Subject: [PATCH 03/47] Created using Colaboratory --- tutorial.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tutorial.ipynb b/tutorial.ipynb index ee2e81998553..47c44251b5ab 100644 --- a/tutorial.ipynb +++ b/tutorial.ipynb @@ -639,8 +639,8 @@ "source": [ "# Download COCO test-dev2017\n", "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017labels.zip', 'tmp.zip')\n", - "!unzip -q tmp.zip -d ../datasets && rm tmp.zip # unzip labels\n", - "!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d ../datasets/coco/images # 7GB 41k images" + "!unzip -q tmp.zip -d ../datasets && rm tmp.zip\n", + "!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d ../datasets/coco/images" ], "execution_count": null, "outputs": [] @@ -651,8 +651,8 @@ "id": "29GJXAP_lPrt" }, "source": [ - "# Run YOLOv5s on COCO test\n", - "!python val.py --weights yolov5s.pt --data coco.yaml --task test" + "# Run YOLOv5x on COCO test\n", + "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half --task test" ], "execution_count": null, "outputs": [] From e0c3f42de7aa204d078a0402592acf5a58811fce Mon Sep 17 00:00:00 2001 From: Jebastin Nadar Date: Sat, 23 Oct 2021 17:10:34 +0530 Subject: [PATCH 04/47] Uncomment OpenCV 4.5.4 requirement in detect.py (#5305) --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index ff8e32acbaed..70c52dc5214b 100644 --- a/detect.py +++ b/detect.py @@ -89,7 +89,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: if dnn: - # check_requirements(('opencv-python>=4.5.4',)) + check_requirements(('opencv-python>=4.5.4',)) net = cv2.dnn.readNetFromONNX(w) else: check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) From e57378c35b186ea38f91099746b7e54dd0141108 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 19:39:58 -0700 Subject: [PATCH 05/47] Autobatch --- utils/autobatch.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 utils/autobatch.py diff --git a/utils/autobatch.py b/utils/autobatch.py new file mode 100644 index 000000000000..993e731980fb --- /dev/null +++ b/utils/autobatch.py @@ -0,0 +1,37 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Auto-batch utils +""" + +import random + +import numpy as np +import torch +import yaml +from tqdm import tqdm + +from utils.general import colorstr + + +def autobatch(model, imgsz=640, fraction=0.8): + # Automatically compute optimal batch size to use `fraction` of available CUDA memory + prefix = colorstr('autobatch: ') + print(f'\n{prefix} Computing optimal batch size') + + t = torch.cuda.get_device_properties(0).total_memory / 1E9 # (GB) + r = torch.cuda.memory_reserved(0) / 1E9 # (GB) + a = torch.cuda.memory_allocated(0) / 1E9 # (GB) + f = r - a # free inside reserved + + try: + batch_sizes = [1, 2, 4, 8] + print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + except Exception as e: + print() + + + #x, y = zip(*x) + #p = np.polyfit(x, y) + + + return None From 4b963ec944c2265b89c6834abb6771cb989231a4 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 19:46:08 -0700 Subject: [PATCH 06/47] fix mem --- utils/autobatch.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 993e731980fb..39b8d37c6bae 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -13,14 +13,14 @@ from utils.general import colorstr -def autobatch(model, imgsz=640, fraction=0.8): +def autobatch(model, imgsz=640, fraction=0.8, device=0): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'\n{prefix} Computing optimal batch size') - t = torch.cuda.get_device_properties(0).total_memory / 1E9 # (GB) - r = torch.cuda.memory_reserved(0) / 1E9 # (GB) - a = torch.cuda.memory_allocated(0) / 1E9 # (GB) + t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) + r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) + a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = r - a # free inside reserved try: From 0890e0eb8c1ce1a2213bd8a03e5e2483539c1476 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 19:48:26 -0700 Subject: [PATCH 07/47] fix mem2 --- utils/autobatch.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 39b8d37c6bae..4ad204caa1fd 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -21,17 +21,16 @@ def autobatch(model, imgsz=640, fraction=0.8, device=0): t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) - f = r - a # free inside reserved + f = t - (r + a) # free inside reserved + batch_sizes = [1, 2, 4, 8] + x = [] try: - batch_sizes = [1, 2, 4, 8] print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') except Exception as e: print() - - #x, y = zip(*x) - #p = np.polyfit(x, y) - + # x, y = zip(*x) + # p = np.polyfit(x, y) return None From b25616ffe8e0db0eb89bb3d8c44879d6375faa3f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:03:45 -0700 Subject: [PATCH 08/47] Update --- utils/autobatch.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 4ad204caa1fd..6892b8332a42 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -3,17 +3,16 @@ Auto-batch utils """ -import random +from copy import deepcopy import numpy as np import torch -import yaml -from tqdm import tqdm from utils.general import colorstr +from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=640, fraction=0.8, device=0): +def autobatch(model, imgsz=64, fraction=0.8, device='cpu'): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'\n{prefix} Computing optimal batch size') @@ -22,15 +21,23 @@ def autobatch(model, imgsz=640, fraction=0.8, device=0): r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved + # f = 15.8 + print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8] - x = [] + model = deepcopy(de_parallel(model)).train() try: - print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] + y = profile(img, model, n=3, device=device) + y = [x[2] for x in y] # memory [2] except Exception as e: print() - # x, y = zip(*x) - # p = np.polyfit(x, y) + p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + f_intercept = int((f - p[0]) / p[1]) # optimal batch size + return f_intercept - return None + +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) + +autobatch(model) From ab9b602db4e5596f162a100de6c28b833d47d8a1 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:05:19 -0700 Subject: [PATCH 09/47] Update --- utils/autobatch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 6892b8332a42..66e07f133578 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,11 +12,12 @@ from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=64, fraction=0.8, device='cpu'): +def autobatch(model, imgsz=64, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'\n{prefix} Computing optimal batch size') + device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) @@ -34,7 +35,7 @@ def autobatch(model, imgsz=64, fraction=0.8, device='cpu'): print() p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((f - p[0]) / p[1]) # optimal batch size + f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From 57c39fbe2c27f3a1fc6f5ffa5d469781effe32ad Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:07:24 -0700 Subject: [PATCH 10/47] Update --- utils/autobatch.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 66e07f133578..a10d71a87c88 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,10 +12,10 @@ from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=64, fraction=0.9): +def autobatch(model, imgsz=640, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') - print(f'\n{prefix} Computing optimal batch size') + print(f'\n{prefix} Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) @@ -38,7 +38,4 @@ def autobatch(model, imgsz=64, fraction=0.9): f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept - -model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) - -autobatch(model) +# autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 9f67809d80897837fc143e419d32bf2610167cd5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:14:22 -0700 Subject: [PATCH 11/47] Update --- utils/autobatch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index a10d71a87c88..9e013602d26e 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -15,7 +15,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') - print(f'\n{prefix} Computing optimal batch size for --imgsz {imgsz}') + print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) @@ -23,7 +23,7 @@ def autobatch(model, imgsz=640, fraction=0.9): a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved # f = 15.8 - print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8] model = deepcopy(de_parallel(model)).train() @@ -35,6 +35,7 @@ def autobatch(model, imgsz=640, fraction=0.9): print() p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + print(batch_sizes, y, p) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From 554a962edd152ff440c2d50b1569b6f94347d450 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:16:26 -0700 Subject: [PATCH 12/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 9e013602d26e..603d2930d203 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,7 +34,7 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + p = np.polyfit(batch_sizes, y, w=batch_sizes, deg=1) # first degree polynomial fit print(batch_sizes, y, p) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From b1c7d341325382a812bb26f2e77342f7f9127d2f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:18:17 -0700 Subject: [PATCH 13/47] Update --- utils/autobatch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index 603d2930d203..b91000c951da 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -36,6 +36,7 @@ def autobatch(model, imgsz=640, fraction=0.9): p = np.polyfit(batch_sizes, y, w=batch_sizes, deg=1) # first degree polynomial fit print(batch_sizes, y, p) + print(np.polyval(p, batch_sizes)) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From d6a210e8232ac61ffd23b77cf264c80313cae6df Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:19:45 -0700 Subject: [PATCH 14/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index b91000c951da..3dc08616b13d 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8] + batch_sizes = [1, 2, 4, 8, 16] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] From b86b3072c00449d62494ed505b8465bd594820e9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:21:34 -0700 Subject: [PATCH 15/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 3dc08616b13d..e209a7a1cb67 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16] + batch_sizes = [1, 2, 4, 8, 16, 32, 64] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] From ce4110757715ea557b691a8c18a0d4880eedf244 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:22:41 -0700 Subject: [PATCH 16/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index e209a7a1cb67..2db2781b0a58 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32, 64] + batch_sizes = [1, 2, 4, 8, 16, 32] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] From d18d3921e36c5287113c259ae69241ab0d0f5cca Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:25:33 -0700 Subject: [PATCH 17/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 2db2781b0a58..35296542219a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,7 +34,7 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - p = np.polyfit(batch_sizes, y, w=batch_sizes, deg=1) # first degree polynomial fit + p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit print(batch_sizes, y, p) print(np.polyval(p, batch_sizes)) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size From 5d960f51b263be674fc1a52a2e9e8f425e4d198a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:28:35 -0700 Subject: [PATCH 18/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 35296542219a..df8e4abc6cff 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -37,7 +37,7 @@ def autobatch(model, imgsz=640, fraction=0.9): p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit print(batch_sizes, y, p) print(np.polyval(p, batch_sizes)) - f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 59510d71e4c7c9ad38115f03ea51c571ce9cbad4 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:29:55 -0700 Subject: [PATCH 19/47] Update --- utils/autobatch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index df8e4abc6cff..e95f6e75802a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,10 +34,12 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - print(batch_sizes, y, p) - print(np.polyval(p, batch_sizes)) - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size + for i in range(2, 7): + p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size + print(f_intercept) + + return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From dfc248f837eddb76a5e5803ba3d7458c2ac58257 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:32:14 -0700 Subject: [PATCH 20/47] Update --- utils/autobatch.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index e95f6e75802a..dbe6a64d273a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32] + batch_sizes = [1, 2, 4, 8, 16, 64] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] @@ -34,12 +34,13 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - for i in range(2, 7): + + print(y) + for i in range(2, len(batch_sizes)): p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size print(f_intercept) - return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 6877a06292ddec1071c828c0e4f63beed828b3bf Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:34:55 -0700 Subject: [PATCH 21/47] Update --- utils/autobatch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index dbe6a64d273a..7845bd44cf18 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -30,11 +30,11 @@ def autobatch(model, imgsz=640, fraction=0.9): try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) - y = [x[2] for x in y] # memory [2] + y = [x[2] for x in y if y] # memory [2] except Exception as e: - print() - + print((f'{prefix}{e}) + batch_sizes = batch_sizes[:len(y)] print(y) for i in range(2, len(batch_sizes)): p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit From bef55101e47eaf46f47e82bcf2a2b6fe39862a4f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:35:53 -0700 Subject: [PATCH 22/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 7845bd44cf18..1dcdccfdb65e 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -32,7 +32,7 @@ def autobatch(model, imgsz=640, fraction=0.9): y = profile(img, model, n=3, device=device) y = [x[2] for x in y if y] # memory [2] except Exception as e: - print((f'{prefix}{e}) + print(f'{prefix}{e}') batch_sizes = batch_sizes[:len(y)] print(y) From 3cecfd1290b0b0656870501fa41980dc1b8967f3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:37:33 -0700 Subject: [PATCH 23/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 1dcdccfdb65e..fa8df008ab37 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -30,10 +30,10 @@ def autobatch(model, imgsz=640, fraction=0.9): try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) - y = [x[2] for x in y if y] # memory [2] except Exception as e: print(f'{prefix}{e}') + y = [x[2] for x in y if y] # memory [2] batch_sizes = batch_sizes[:len(y)] print(y) for i in range(2, len(batch_sizes)): From 06ceddf4ed3bf30fc16a24db375042768df7dda9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:38:43 -0700 Subject: [PATCH 24/47] Update --- utils/autobatch.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index fa8df008ab37..2b8152e47ed6 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -33,9 +33,8 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print(f'{prefix}{e}') - y = [x[2] for x in y if y] # memory [2] + y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] - print(y) for i in range(2, len(batch_sizes)): p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size From 30cdf72da74a152fbd81dcf99cbbc59500b9b309 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:44:58 -0700 Subject: [PATCH 25/47] Update --- utils/autobatch.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 2b8152e47ed6..bcf6332b7547 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,7 +12,7 @@ from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=640, fraction=0.9): +def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') @@ -22,10 +22,9 @@ def autobatch(model, imgsz=640, fraction=0.9): r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved - # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 64] + batch_sizes = [1, 2, 4, 8, 16] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] @@ -35,11 +34,8 @@ def autobatch(model, imgsz=640, fraction=0.9): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] - for i in range(2, len(batch_sizes)): - p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f_intercept) - + p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 29bcbd5b7117d9ef17812e3ee75eb146df136c74 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:45:22 -0700 Subject: [PATCH 26/47] Update --- utils/autobatch.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index bcf6332b7547..b9f722869158 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -16,7 +16,7 @@ def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') - + model = deepcopy(de_parallel(model)).train() device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) @@ -25,7 +25,6 @@ def autobatch(model, imgsz=640, fraction=0.95): print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8, 16] - model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From 9426f6e2fad70c665feb0b053790288f4d76dd59 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:46:31 -0700 Subject: [PATCH 27/47] Update --- utils/autobatch.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index b9f722869158..b0c89311ffc1 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -14,6 +14,12 @@ def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory + # Usage: + # import torch + # from utils.autobatch import autobatch + # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) + # print(autobatch(model)) + prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') model = deepcopy(de_parallel(model)).train() From c08463790592ef6031d2f3bdd66b2a6e83410a09 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 22:33:18 -0700 Subject: [PATCH 28/47] Update --- utils/autobatch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index b0c89311ffc1..f344156409bb 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,6 +12,14 @@ from utils.torch_utils import de_parallel, profile +def check_batch_size(model, imgsz=640, b=16): + # Check YOLOv5 batch size + assert isinstance(b, int), f'batch-size {b} must be integer' + if b < 1: + b = autobatch(model, imgsz) # compute optimal batch size + return b + + def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory # Usage: From ed38add5797786fe14aff716dcaa5d7f1f3bc61e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:01:15 -0700 Subject: [PATCH 29/47] Update train.py --- train.py | 15 ++++++++++----- utils/autobatch.py | 3 +-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/train.py b/train.py index da7346be77ab..ed33f506c17d 100644 --- a/train.py +++ b/train.py @@ -36,6 +36,7 @@ from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors +from utils.autobatch import check_batch_size from utils.datasets import create_dataloader from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \ @@ -131,6 +132,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary print(f'freezing {k}') v.requires_grad = False + # Image size + gs = max(int(model.stride.max()), 32) # grid size (max stride) + imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple + + # Batch size + if cuda and RANK == -1: # single-GPU only + batch_size = check_batch_size(model, batch_size, imgsz) + # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing @@ -190,11 +199,6 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary del ckpt, csd - # Image sizes - gs = max(int(model.stride.max()), 32) # grid size (max stride) - nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) - imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple - # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n' @@ -242,6 +246,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # Model parameters + nl = model.model[-1].nl # number of detection layers (to scale hyps) hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers diff --git a/utils/autobatch.py b/utils/autobatch.py index f344156409bb..89cd5b53c40c 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -14,8 +14,7 @@ def check_batch_size(model, imgsz=640, b=16): # Check YOLOv5 batch size - assert isinstance(b, int), f'batch-size {b} must be integer' - if b < 1: + if b < 1 or b == 'auto': b = autobatch(model, imgsz) # compute optimal batch size return b From d9e2463c3588f6a0eee65e0fb2244f77192caba3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:15:34 -0700 Subject: [PATCH 30/47] print result --- utils/autobatch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index 89cd5b53c40c..2834fb035020 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,6 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size + print(f'{prefix} batch-size {f_intercept} estimated to use f{fraction * 100}%% of CUDA device {device} memory') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 1a57a3570099e2c3d671ba288da98264361d612d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:17:55 -0700 Subject: [PATCH 31/47] Cleanup print result --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 2834fb035020..8e5ce7ddd238 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,7 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix} batch-size {f_intercept} estimated to use f{fraction * 100}%% of CUDA device {device} memory') + print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of CUDA:{device} memory') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 9f4f2a5c6c3be00f8104f627adffe154a4f0cfe9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:34:05 -0700 Subject: [PATCH 32/47] swap fix in call --- train.py | 2 +- utils/autobatch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index ed33f506c17d..b7b542a38b46 100644 --- a/train.py +++ b/train.py @@ -138,7 +138,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only - batch_size = check_batch_size(model, batch_size, imgsz) + batch_size = check_batch_size(model, imgsz, batch_size) # Optimizer nbs = 64 # nominal batch size diff --git a/utils/autobatch.py b/utils/autobatch.py index 8e5ce7ddd238..5ea1978fd999 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,7 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of CUDA:{device} memory') + print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of {str(device).upper()} memory') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 796bd69e29c839b9d3930fb526208a9f9ee181b7 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:36:06 -0700 Subject: [PATCH 33/47] to 64 --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 5ea1978fd999..03484969f988 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -37,7 +37,7 @@ def autobatch(model, imgsz=640, fraction=0.95): f = t - (r + a) # free inside reserved print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16] + batch_sizes = [1, 2, 4, 8, 16, 32, 64] try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From 46c68c1ea9738d8c5855f90512745e8706740e7e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:41:40 -0700 Subject: [PATCH 34/47] use total --- utils/autobatch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 03484969f988..404f81af3dee 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -47,8 +47,9 @@ def autobatch(model, imgsz=640, fraction=0.95): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of {str(device).upper()} memory') + f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size + print(f'{prefix}batch-size {f_intercept} estimated to utilize {f_intercept:3}G of ' + f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0g})') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 7a07b812ea278834b05fc9ca55547403c92910c2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:44:34 -0700 Subject: [PATCH 35/47] fix --- utils/autobatch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 404f81af3dee..6838eeea45ee 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,8 +48,8 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {f_intercept:3}G of ' - f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0g})') + print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:3}G of ' + f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0f}%)') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 2f9f5dbb739354cbdc733ffbfaa5d0e9b86d4e8e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:50:28 -0700 Subject: [PATCH 36/47] fix --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 6838eeea45ee..5f3f2a781287 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,7 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:3}G of ' + print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:.3g}G of ' f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0f}%)') return f_intercept From 85d01e19727e122902f08f7c12fd36d0ef61217b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:53:55 -0700 Subject: [PATCH 37/47] fix --- utils/autobatch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 5f3f2a781287..b8676a0544a6 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,8 +48,8 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:.3g}G of ' - f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0f}%)') + print(f'{prefix}batch-size {f_intercept} estimated to utilize ' + f'{str(device).upper()} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 0faff8ed640593ae6ae32134d1db10d8b12aa25f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:55:41 -0700 Subject: [PATCH 38/47] fix --- train.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index b7b542a38b46..f2465366f62d 100644 --- a/train.py +++ b/train.py @@ -138,7 +138,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only - batch_size = check_batch_size(model, imgsz, batch_size) + with amp.autocast(): + batch_size = check_batch_size(model, imgsz, batch_size) # Optimizer nbs = 64 # nominal batch size From 66fe7b2a6ec2ac8dbc36229afff490cfa4e5452f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:57:34 -0700 Subject: [PATCH 39/47] fix --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index b8676a0544a6..03b4f1f62d1a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -47,7 +47,7 @@ def autobatch(model, imgsz=640, fraction=0.95): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size print(f'{prefix}batch-size {f_intercept} estimated to utilize ' f'{str(device).upper()} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') return f_intercept From f864fb5a5147ecaad9a970a310459d95afdb6332 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:10:49 -0700 Subject: [PATCH 40/47] Update --- train.py | 3 ++- utils/autobatch.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index f2465366f62d..f4f54cf7ec7b 100644 --- a/train.py +++ b/train.py @@ -139,7 +139,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only with amp.autocast(): - batch_size = check_batch_size(model, imgsz, batch_size) + batch_size = check_batch_size(deepcopy(model).eval(), imgsz, batch_size) + batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer nbs = 64 # nominal batch size diff --git a/utils/autobatch.py b/utils/autobatch.py index 03b4f1f62d1a..33c0d2d9a68c 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -29,7 +29,6 @@ def autobatch(model, imgsz=640, fraction=0.95): prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') - model = deepcopy(de_parallel(model)).train() device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) From f3434ff0e7cbc88e2951ca313590aff163194ba6 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:16:36 -0700 Subject: [PATCH 41/47] Update --- train.py | 2 +- utils/torch_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index f4f54cf7ec7b..01b6aacb16ee 100644 --- a/train.py +++ b/train.py @@ -139,7 +139,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only with amp.autocast(): - batch_size = check_batch_size(deepcopy(model).eval(), imgsz, batch_size) + check_batch_size(deepcopy(model).eval(), imgsz, batch_size) batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer diff --git a/utils/torch_utils.py b/utils/torch_utils.py index d1c48f73ea72..6f52f9a3728d 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -126,7 +126,7 @@ def profile(input, ops, n=10, device=None): _ = (sum([yi.sum() for yi in y]) if isinstance(y, list) else y).sum().backward() t[2] = time_sync() except Exception as e: # no backward method - print(e) + # print(e) # for debug t[2] = float('nan') tf += (t[1] - t[0]) * 1000 / n # ms per op forward tb += (t[2] - t[1]) * 1000 / n # ms per op backward From 1f7aa2e1b03e291f01e49969140468fa8f35a153 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:17:46 -0700 Subject: [PATCH 42/47] Update --- train.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 01b6aacb16ee..7760c826cf7b 100644 --- a/train.py +++ b/train.py @@ -138,8 +138,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only + + model2 = deepcopy(model).eval() + for k, v in model2.named_parameters(): + v.requires_grad = True # train all layers + check_batch_size(model2.eval(), imgsz, batch_size) + with amp.autocast(): - check_batch_size(deepcopy(model).eval(), imgsz, batch_size) + check_batch_size(model2.eval(), imgsz, batch_size) batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer From e8ccd55ee91f2183499152aa611424545c137d56 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:23:36 -0700 Subject: [PATCH 43/47] Update --- train.py | 7 ------- utils/autobatch.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/train.py b/train.py index 7760c826cf7b..dfe997b1bde2 100644 --- a/train.py +++ b/train.py @@ -138,14 +138,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only - - model2 = deepcopy(model).eval() - for k, v in model2.named_parameters(): - v.requires_grad = True # train all layers - check_batch_size(model2.eval(), imgsz, batch_size) - with amp.autocast(): - check_batch_size(model2.eval(), imgsz, batch_size) batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer diff --git a/utils/autobatch.py b/utils/autobatch.py index 33c0d2d9a68c..d94dabc47981 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -36,7 +36,7 @@ def autobatch(model, imgsz=640, fraction=0.95): f = t - (r + a) # free inside reserved print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32, 64] + batch_sizes = [1, 2, 4, 8, 16, 32] try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From 7537daab678e35eab6c5a71b8e070be50c534fc2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:25:24 -0700 Subject: [PATCH 44/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index d94dabc47981..73ffe68b628f 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -19,7 +19,7 @@ def check_batch_size(model, imgsz=640, b=16): return b -def autobatch(model, imgsz=640, fraction=0.95): +def autobatch(model, imgsz=640, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory # Usage: # import torch From 1e6d2d84b26c56c56be3cac946ac7134ee149668 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:35:13 -0700 Subject: [PATCH 45/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 73ffe68b628f..3f97140c1a29 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -36,7 +36,7 @@ def autobatch(model, imgsz=640, fraction=0.9): f = t - (r + a) # free inside reserved print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32] + batch_sizes = [1, 2, 4, 8, 16] try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From a39de1e91fb2563bb1fe6f5308f37d79706e0098 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:54:23 -0700 Subject: [PATCH 46/47] Update --- train.py | 9 ++++----- utils/autobatch.py | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/train.py b/train.py index dfe997b1bde2..d83f3cd1863c 100644 --- a/train.py +++ b/train.py @@ -36,7 +36,7 @@ from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors -from utils.autobatch import check_batch_size +from utils.autobatch import check_train_batch_size from utils.datasets import create_dataloader from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \ @@ -137,9 +137,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size - if cuda and RANK == -1: # single-GPU only - with amp.autocast(): - batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) + if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size + batch_size = check_train_batch_size(model, imgsz) # Optimizer nbs = 64 # nominal batch size @@ -446,7 +445,7 @@ def parse_opt(known=False): parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=300) - parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') + parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') diff --git a/utils/autobatch.py b/utils/autobatch.py index 3f97140c1a29..22a8c59040c8 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -7,20 +7,20 @@ import numpy as np import torch +from torch.cuda import amp from utils.general import colorstr -from utils.torch_utils import de_parallel, profile +from utils.torch_utils import profile -def check_batch_size(model, imgsz=640, b=16): - # Check YOLOv5 batch size - if b < 1 or b == 'auto': - b = autobatch(model, imgsz) # compute optimal batch size - return b +def check_train_batch_size(model, imgsz=640): + # Check YOLOv5 training batch size + with amp.autocast(): + return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size -def autobatch(model, imgsz=640, fraction=0.9): - # Automatically compute optimal batch size to use `fraction` of available CUDA memory +def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): + # Automatically estimate best batch size to use `fraction` of available CUDA memory # Usage: # import torch # from utils.autobatch import autobatch @@ -30,6 +30,10 @@ def autobatch(model, imgsz=640, fraction=0.9): prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device + if device.type == 'cpu': + print(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') + return batch_size + t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) From f4141339201c0e4eb247d1334d735dfae72a1e38 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 21:10:04 -0700 Subject: [PATCH 47/47] Cleanup printing --- utils/autobatch.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 22a8c59040c8..cf65502d5608 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,11 +34,12 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): print(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') return batch_size + d = str(device).upper() # 'CUDA:0' t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved - print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + print(f'{prefix}{d} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8, 16] try: @@ -50,9 +51,8 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize ' - f'{str(device).upper()} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') - return f_intercept + b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) + print(f'{prefix}batch-size {b} estimated to utilize {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') + return b # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False))