From f970e1044878c449ffb5a9d18529270bf642307c Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Thu, 9 May 2019 23:12:50 +0800 Subject: [PATCH 1/4] fix distribute doc test=develop --- python/paddle/fluid/initializer.py | 1 + .../fluid/transpiler/distribute_transpiler.py | 28 +++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 58819efea0421..d73d794854755 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -824,6 +824,7 @@ class NumpyArrayInitializer(Initializer): Examples: .. code-block:: python + x = fluid.layers.data(name="x", shape=[5], dtype='float32') fc = fluid.layers.fc(input=x, size=10, param_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2]))) """ diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 60f74bb62646e..826def313dca1 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -146,6 +146,11 @@ class DistributeTranspilerConfig(object): We can use bandwidth effiently when data size is larger than 2MB.If you want to change it, please be sure you have read the slice_variable function. + Examples: + .. code-block:: python + + config = fluid.DistributeTranspilerConfig() + config.slice_var_up = True """ slice_var_up = True @@ -187,7 +192,7 @@ class DistributeTranspiler(object): current_endpoint = "192.168.0.1:6174" trainer_id = 0 trainers = 4 - role = os.getenv("PADDLE_TRAINING_ROLE") + role = "PSERVER" t = fluid.DistributeTranspiler() t.transpile( trainer_id, pservers=pserver_endpoints, trainers=trainers) @@ -199,14 +204,16 @@ class DistributeTranspiler(object): trainer_program = t.get_trainer_program() # for nccl2 mode + trainer_num = 2 + trainer_id = 0 config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) - t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep) + t.transpile(trainer_id, trainers=workers, current_endpoint=current_endpoint) exe = fluid.ParallelExecutor( - use_cuda, + use_cuda=True, loss_name=loss_var.name, - num_trainers=len(trainers.split(",)), + num_trainers=trainer_num, trainer_id=trainer_id ) """ @@ -289,7 +296,7 @@ def transpile(self, startup_program=None, current_endpoint="127.0.0.1:6174"): """ - Run the transpiler. + Run the transpiler. Transpile the input program. Args: trainer_id (int): id for current trainer worker, if you have @@ -309,6 +316,17 @@ def transpile(self, current_endpoint (str): need pass current endpoint when transpile as nccl2 distributed mode. In pserver mode this argument is not used. + + Examples: + .. code-block:: python + + transpiler = fluid.DistributeTranspiler() + t.transpile( + trainer_id=0, + pservers="127.0.0.1:7000,127.0.0.1:7001", + trainers=2, + sync_mode=False, + current_endpoint="127.0.0.1:7000") """ if program is None: program = default_main_program() From b078c456788133593eba1184bb42a5e195ba9951 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Fri, 10 May 2019 09:23:01 +0800 Subject: [PATCH 2/4] update doc test=develop --- paddle/fluid/pybind/pybind.cc | 16 ++++++++++++---- .../fluid/transpiler/distribute_transpiler.py | 15 +++++++++++++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 1b595b010ff82..cebbf9abda9ca 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1191,15 +1191,23 @@ All parameter, weight, gradient are variables in Paddle. Examples: .. code-block:: python + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_loss = fluid.layers.mean(cost) + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_loss) + exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 4 - train_exe = fluid.ParallelExecutor(use_cuda=True, - loss_name=loss.name, + train_exe = fluid.ParallelExecutor(use_cuda=False, + loss_name=avg_loss.name, exec_strategy=exec_strategy) - train_loss, = train_exe.run([loss.name], feed=feed_dict) - )DOC"); exec_strategy.def(py::init()) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 826def313dca1..c9ad3340a2cd4 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -186,6 +186,16 @@ class DistributeTranspiler(object): Examples: .. code-block:: python + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_loss = fluid.layers.mean(cost) + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_loss) + # for pserver mode pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174" trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174" @@ -208,11 +218,12 @@ class DistributeTranspiler(object): trainer_id = 0 config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" + trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174" t = fluid.DistributeTranspiler(config=config) - t.transpile(trainer_id, trainers=workers, current_endpoint=current_endpoint) + t.transpile(trainer_id=trainer_id, trainers=trainer_endpoints, current_endpoint="192.168.0.1:6174") exe = fluid.ParallelExecutor( use_cuda=True, - loss_name=loss_var.name, + loss_name=avg_loss.name, num_trainers=trainer_num, trainer_id=trainer_id ) From 5fdd186787caf67d100ccb55ed21eb79195d7403 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Fri, 10 May 2019 11:33:54 +0800 Subject: [PATCH 3/4] change api spec test=develop test=document_preview --- paddle/fluid/API.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index bb6addf226fe1..e34c21c509fbb 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -26,7 +26,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'en paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8')) paddle.fluid.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) paddle.fluid.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) -paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4')) +paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '418c7e8b268e9be4104f2809e654c2f7')) paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) paddle.fluid.DistributeTranspilerConfig.__init__ @@ -422,7 +422,7 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=[ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8')) paddle.fluid.transpiler.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) -paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4')) +paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '418c7e8b268e9be4104f2809e654c2f7')) paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) paddle.fluid.transpiler.HashName.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) From b9e0f7bbf3d5e71106810c49ddda9b6b2a3fbcae Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Wed, 22 May 2019 13:53:36 +0800 Subject: [PATCH 4/4] fix dist doc test=develop --- paddle/fluid/API.spec | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index d5bfd904b4315..52054cd880e29 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -27,8 +27,8 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'e paddle.fluid.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) paddle.fluid.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '418c7e8b268e9be4104f2809e654c2f7')) -paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) -paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) +paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', '97fdf61cf1ed4fb8f6a4b58aebce26a2')) +paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b524b73e10f9ccdfa6d189e2e535fc17')) paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '80d857dc626612e2b2460d0154551e95')) @@ -428,8 +428,8 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args= paddle.fluid.transpiler.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '418c7e8b268e9be4104f2809e654c2f7')) -paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) -paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) +paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', '97fdf61cf1ed4fb8f6a4b58aebce26a2')) +paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b524b73e10f9ccdfa6d189e2e535fc17')) paddle.fluid.transpiler.HashName.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.transpiler.HashName.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.transpiler.HashName.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))