diff --git a/llm/fastdeploy_llm/utils/launch_infer.py b/llm/fastdeploy_llm/utils/launch_infer.py index eca1198202..526da36b98 100644 --- a/llm/fastdeploy_llm/utils/launch_infer.py +++ b/llm/fastdeploy_llm/utils/launch_infer.py @@ -43,6 +43,19 @@ def launch(device_ids, **kwargs: dict): pd_cmd = "python3 -m paddle.distributed.launch --devices {} {} {}".format( device_ids, infer_script_path, ' '.join(args)) + + multicard = True + try: + ids = eval(device_ids) + if isinstance(ids, int): + multicard = False + except: + pass + + if os.getenv("DISABLE_DISTRIBUTED_LAUNCH", "OFF") == "ON" and not multicard: + logger.info("Since DISABLE_DISTRIBUTED_LAUNCH=ON and the model is running with single card, will disable paddle.distributed.launch.") + pd_cmd = "python3 {} {}".format(infer_script_path, ' '.join(args)) + logger.info("Launch model with command: {}".format(pd_cmd)) logger.info("Model is initializing...") p = subprocess.Popen(