1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
| (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] EngineCore failed to start. (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] Traceback (most recent call last): (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 927, in run_engine_core (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs) (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 692, in __init__ (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] super().__init__( (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 113, in __init__ (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] num_gpu_blocks, num_cpu_blocks, kv_cache_config = self._initialize_kv_caches( (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 254, in _initialize_kv_caches (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] kv_cache_configs = get_kv_cache_configs( (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] File "/usr/local/lib/python3.10/dist-packages/vllm/v1/core/kv_cache_utils.py", line 1514, in get_kv_cache_configs (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] _check_enough_kv_cache_memory( (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] File "/usr/local/lib/python3.10/dist-packages/vllm/v1/core/kv_cache_utils.py", line 634, in _check_enough_kv_cache_memory (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] raise ValueError( (EngineCore_DP0 pid=111) ERROR 02-10 06:27:53 [core.py:936] ValueError: To serve at least one request with the models's max seq len (65536), (7.0 GiB KV cache is needed, which is larger than the available KV cache memory (4.3 GiB). Based on the available memory, the estimated maximum model length is 40208. Try increasing `gpu_memory_utilization` or decreasing `max_model_len` when initializing the engine. See https://docs.vllm.ai/en/latest/configuration/conserving_memory/ for more details. (EngineCore_DP0 pid=111) Process EngineCore_DP0: (EngineCore_DP0 pid=111) Traceback (most recent call last): (EngineCore_DP0 pid=111) File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap (EngineCore_DP0 pid=111) self.run() (EngineCore_DP0 pid=111) File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run (EngineCore_DP0 pid=111) self._target(*self._args, **self._kwargs) (EngineCore_DP0 pid=111) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 940, in run_engine_core (EngineCore_DP0 pid=111) raise e (EngineCore_DP0 pid=111) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 927, in run_engine_core (EngineCore_DP0 pid=111) engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs) (EngineCore_DP0 pid=111) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 692, in __init__ (EngineCore_DP0 pid=111) super().__init__( (EngineCore_DP0 pid=111) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 113, in __init__ (EngineCore_DP0 pid=111) num_gpu_blocks, num_cpu_blocks, kv_cache_config = self._initialize_kv_caches( (EngineCore_DP0 pid=111) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core.py", line 254, in _initialize_kv_caches (EngineCore_DP0 pid=111) kv_cache_configs = get_kv_cache_configs( (EngineCore_DP0 pid=111) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/core/kv_cache_utils.py", line 1514, in get_kv_cache_configs (EngineCore_DP0 pid=111) _check_enough_kv_cache_memory( (EngineCore_DP0 pid=111) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/core/kv_cache_utils.py", line 634, in _check_enough_kv_cache_memory (EngineCore_DP0 pid=111) raise ValueError( (EngineCore_DP0 pid=111) ValueError: To serve at least one request with the models's max seq len (65536), (7.0 GiB KV cache is needed, which is larger than the available KV cache memory (4.3 GiB). Based on the available memory, the estimated maximum model length is 40208. Try increasing `gpu_memory_utilization` or decreasing `max_model_len` when initializing the engine. See https://docs.vllm.ai/en/latest/configuration/conserving_memory/ for more details. [rank0]:[W210 06:27:54.341555941 ProcessGroupNCCL.cpp:1524] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html (APIServer pid=29) Traceback (most recent call last): (APIServer pid=29) File "/usr/local/bin/qwen-asr-serve", line 7, in <module> (APIServer pid=29) sys.exit(main()) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/qwen_asr/cli/serve.py", line 42, in main (APIServer pid=29) vllm_main() (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/cli/main.py", line 73, in main (APIServer pid=29) args.dispatch_function(args) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/cli/serve.py", line 60, in cmd (APIServer pid=29) uvloop.run(run_server(args)) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/uvloop/__init__.py", line 69, in run (APIServer pid=29) return loop.run_until_complete(wrapper()) (APIServer pid=29) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/uvloop/__init__.py", line 48, in wrapper (APIServer pid=29) return await main (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 1319, in run_server (APIServer pid=29) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 1338, in run_server_worker (APIServer pid=29) async with build_async_engine_client( (APIServer pid=29) File "/usr/lib/python3.10/contextlib.py", line 199, in __aenter__ (APIServer pid=29) return await anext(self.gen) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 173, in build_async_engine_client (APIServer pid=29) async with build_async_engine_client_from_engine_args( (APIServer pid=29) File "/usr/lib/python3.10/contextlib.py", line 199, in __aenter__ (APIServer pid=29) return await anext(self.gen) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 214, in build_async_engine_client_from_engine_args (APIServer pid=29) async_llm = AsyncLLM.from_vllm_config( (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/async_llm.py", line 205, in from_vllm_config (APIServer pid=29) return cls( (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/async_llm.py", line 132, in __init__ (APIServer pid=29) self.engine_core = EngineCoreClient.make_async_mp_client( (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core_client.py", line 122, in make_async_mp_client (APIServer pid=29) return AsyncMPClient(*client_args) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core_client.py", line 824, in __init__ (APIServer pid=29) super().__init__( (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/core_client.py", line 479, in __init__ (APIServer pid=29) with launch_core_engines(vllm_config, executor_class, log_stats) as ( (APIServer pid=29) File "/usr/lib/python3.10/contextlib.py", line 142, in __exit__ (APIServer pid=29) next(self.gen) (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/utils.py", line 921, in launch_core_engines (APIServer pid=29) wait_for_engine_startup( (APIServer pid=29) File "/usr/local/lib/python3.10/dist-packages/vllm/v1/engine/utils.py", line 980, in wait_for_engine_startup (APIServer pid=29) raise RuntimeError( (APIServer pid=29) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
|