代码拉取完成,页面将自动刷新
设置layout后并行切分策略文件保存不支持,导致pipeline无法正常推理
fact.mindspore_semi_parallel_impl(parallel_net, dataset=parallel_dataset, epoch=1,
> device_num=8)
../test_parallel_shard_layout.py:2695:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../pipeline_split/test_pipeline.py:297: in mindspore_semi_parallel_impl
dataset_strategy=dataset_strategy, **kwargs)
../../pipeline_split/test_pipeline.py:278: in __mindspore_impl
eval_network=eval_network)
../../pipeline_split/test_pipeline.py:238: in _model_train_and_save_ckpt
dataset_sink_mode=self.dataset_sink_mode)
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/train/model.py:1074: in train
initial_epoch=initial_epoch)
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/train/model.py:114: in wrapper
func(self, *args, **kwargs)
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/train/model.py:617: in _train
self._train_process(epoch, train_dataset, list_callback, cb_params, initial_epoch, valid_infos)
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/train/model.py:919: in _train_process
outputs = self._train_network(*next_element)
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/cell.py:662: in __call__
out = self.compile_and_run(*args, **kwargs)
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/cell.py:980: in compile_and_run
self.compile(*args, **kwargs)
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/cell.py:964: in compile
jit_config_dict=self._jit_config_dict, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <mindspore.common.api._CellGraphExecutor object at 0xfffea3397610>
obj = TrainOneStepCell<
(network): WithLossCell<
(_backbone): TrainNet<
(block): CellList<
(0): MatMulNe... (_loss_fn): SoftmaxCrossEntropyWithLogits<>
>
(optimizer): CustomOptimizer<>
(grad_reducer): Identity<>
>
phase = 'train.1710836617257453824.281467729116912.0', do_convert = True
jit_config_dict = {'exc_mode': 'auto', 'jit_level': 'O1', 'jit_syntax_level': ''}
args = (Tensor(shape=[128, 96], dtype=Float32, value=
[[ 1.76405239e+00, 4.00157213e-01, 9.78738010e-01 ... 9.76639032e-01...000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00 ... 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]))
kwargs = {}, key_id = '2814677291169121710836617257453824', key = 0
def compile(self, obj, *args, phase='predict', do_convert=True, jit_config_dict=None, **kwargs):
"""
Compiles graph.
Args:
obj (Function/Cell): The function or cell instance need compile.
phase (str): The name of compile phase. Default: 'predict'.
do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
jit_config_dict (dict): Jit config for compile. Default: ``None``.
args (tuple): Args of the Cell object.
kwargs (dict): Kwargs of the Cell object.
Return:
Str, the full phase of the cell.
Bool, if the graph has been compiled before, return False, else return True.
"""
obj.__parse_method__ = 'construct'
if not hasattr(obj, obj.__parse_method__):
raise AttributeError(
'The class {} dose not have method {}'.format(obj.__class__.__name__, obj.__parse_method__))
key_id = str(id(obj)) + str(obj.create_time)
args = get_auto_dynamic_shape_args(args, key_id)
self.enable_tuple_broaden = False
if hasattr(obj, "enable_tuple_broaden"):
self.enable_tuple_broaden = obj.enable_tuple_broaden
logger.debug(f"Convert the network: {do_convert}.")
self._graph_executor.set_enable_tuple_broaden(self.enable_tuple_broaden)
key = self._graph_executor.generate_arguments_key(obj, args, kwargs, self.enable_tuple_broaden)
obj.arguments_key = str(key)
phase = phase + '.' + str(obj.create_time) + '.' + str(id(obj)) + '.' + obj.arguments_key
update_auto_dynamic_shape_phase(args, key_id, phase)
if phase in obj.compile_cache and self.has_compiled(phase):
logger.debug("%r graph has existed.", phase)
# Release resource should be released when CompileInner won't be executed, such as cur_convert_input_
# generated in generate_arguments_key.
self._graph_executor.clear_compile_arguments_resource()
return phase, False
obj.check_names()
_check_full_batch()
self._set_dataset_mode(obj)
self._set_compile_cache_dep_files(phase)
self._graph_executor.set_weights_values(obj.parameters_dict())
if jit_config_dict:
self._graph_executor.set_jit_config(jit_config_dict)
else:
jit_config_dict = JitConfig().jit_config_dict
self._graph_executor.set_jit_config(jit_config_dict)
> result = self._graph_executor.compile(obj, args, kwargs, phase, self._use_vm_mode())
E RuntimeError: The pointer[node_stra.second] is null.
E
E ----------------------------------------------------
E - Framework Unexpected Exception Raised:
E ----------------------------------------------------
E This exception is caused by framework's unexpected error. Please create an issue at https://gitee.com/mindspore/mindspore/issues to get help.
E
E ----------------------------------------------------
E - C++ Call Stack: (For framework developers)
E ----------------------------------------------------
E mindspore/ccsrc/frontend/parallel/strategy_checkpoint/strategy_checkpoint_info.cc:135 to_protobuf
E
E ----------------------------------------------------
E - The Traceback of Net Construct Code:
E ----------------------------------------------------
E
E # In file /root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/wrap/cell_wrapper.py:416
E def construct(self, *inputs):
E ^
E
E # In file /root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/wrap/cell_wrapper.py:418
E return self._no_sens_impl(*inputs)
E ^
E
E # In file /root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/wrap/cell_wrapper.py:431
E def _no_sens_impl(self, *inputs):
E ^
E
E # In file /root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/wrap/cell_wrapper.py:433
E loss = self.network(*inputs)
E ^
E
E # In file /root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/wrap/cell_wrapper.py:120
E def construct(self, data, label):
E
E # In file /root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/wrap/cell_wrapper.py:121
E out = self._backbone(data)
E ^
E
E # In file /root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/nn/wrap/cell_wrapper.py:121
E out = self._backbone(data)
E ^
E
E # In file /home/zhanglin/MindSporeTest/parallel/pipeline_split/test_pipeline.py:87
E for i in range(self.micro_size):
E
E # In file /home/zhanglin/MindSporeTest/parallel/pipeline_split/test_pipeline.py:88
E x = self.block[i](x)
E ^
E
E # In file /home/zhanglin/MindSporeTest/parallel/pipeline_split/test_pipeline.py:64
E x = self.matmul1(inputs, self.matmul1_weight)
E ^
/root/archiconda3/envs/zhanglin3.7/lib/python3.7/site-packages/mindspore/common/api.py:1584: RuntimeError
Hardware Environment(Ascend
/GPU
/CPU
) / 硬件环境:
device ascend
Software Environment / 软件环境 (Mandatory / 必填):
-- MindSpore version (e.g., 1.7.0.Bxxx) :
-- Python version (e.g., Python 3.7.5) :
-- OS platform and distribution (e.g., Linux Ubuntu 16.04):
-- GCC/Compiler version (if compiled from source):
Excute Mode / 执行模式 (Mandatory / 必填)(PyNative
/Graph
):
mode graph
test_parallel_shard_layout_and_pipeline
执行用例
Please assign maintainer to check this issue.
请为此issue分配处理人。
@zhang_lin66
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
感谢您的提问,您可以评论//mindspore-assistant更快获取帮助:
未实现需求,转需求
ccb结论:转需求
登录 后才可以发表评论