1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
| { "Version": "1.0.0", "ServerConfig": { "ipAddress": "0.0.0.0", "managementIpAddress": "0.0.0.0", "port": 1025, "managementPort": 1026, "metricsPort": 1027, "allowAllZeroIpListening": true, "maxLinkNum": 1000, "httpsEnabled": false, "fullTextEnabled": false, "inferMode": "standard", "interCommTLSEnabled": false, "interCommPort": 1121, "openAiSupport": "vllm", "tokenTimeout": 600, "e2eTimeout": 600, "distDPServerEnabled": false }, "BackendConfig": { "backendName": "mindieservice_llm_engine", "modelInstanceNumber": 1, "npuDeviceIds": [[1]], "tokenizerProcessNumber": 8, "multiNodesInferEnabled": false, "multiNodesInferPort": 1120, "interNodeTLSEnabled": false, "ModelDeployConfig": { "maxSeqLen": 8192, "maxInputTokenLen": 7168, "truncation": false, "ModelConfig": [ { "modelInstanceType": "Standard", "modelName": "qwen3-8b", "modelWeightPath": "/data/models/Qwen3-8B", "worldSize": 1, "cpuMemSize": 0, "npuMemSize": -1, "backendType": "atb", "trustRemoteCode": true } ] }, "ScheduleConfig": { "templateType": "Standard", "templateName": "Standard_LLM", "cacheBlockSize": 128, "maxPrefillBatchSize": 50, "maxPrefillTokens": 8192, "prefillTimeMsPerReq": 150, "prefillPolicyType": 0, "decodeTimeMsPerReq": 50, "decodePolicyType": 0, "maxBatchSize": 200, "maxIterTimes": 512, "maxPreemptCount": 0, "supportSelectBatch": false, "maxQueueDelayMicroseconds": 5000, "maxFirstTokenWaitTime": 2500 } }, "LogConfig": { "dynamicLogLevel": "", "dynamicLogLevelValidHours": 2, "dynamicLogLevelValidTime": "" }, "EnableDynamicAdjustTimeoutConfig": false }
|