diff --git a/bin/install_test.sh b/bin/install_test.sh index 3e4829fa510c6e33918363df03cada8ac1e82c7f..b3014b73004313823d289d3786543b3029ef76ba 100755 --- a/bin/install_test.sh +++ b/bin/install_test.sh @@ -15,7 +15,6 @@ testsuite=$1 . $TONE_ROOT/lib/common.sh security_setting - . $TONE_BM_SUITE_DIR/install.sh check_and_create_path $TONE_BM_BUILD_DIR diff --git a/conf/functional/mooncake-ci-test.conf b/conf/functional/mooncake-ci-test.conf index dbc8e9376d0297f6a087006f7b2948002d4187da..a2d8a710fc0c9a4625fc722c30251ef4208bacd1 100644 --- a/conf/functional/mooncake-ci-test.conf +++ b/conf/functional/mooncake-ci-test.conf @@ -1,2 +1,2 @@ test -1p1d-erdma-test \ No newline at end of file +default \ No newline at end of file diff --git a/tests/mooncake-ci-test/Readme.md b/tests/mooncake-ci-test/Readme.md index ffd97435c74ffda3c471ab206cf921273f363831..ae16132b88a726facc517f761112b381bb729e2d 100644 --- a/tests/mooncake-ci-test/Readme.md +++ b/tests/mooncake-ci-test/Readme.md @@ -1,7 +1,17 @@ # mooncake-ci-test ## Description -This test suite validates the Mooncake disaggregated inference system's functionality. It tests SGLang server deployment and operation in prefill and decode modes, verifying the distributed inference architecture. The test includes launching local and remote server instances, configuring a load balancer, and performing end-to-end inference requests. +This test suite provides automated end-to-end testing for Mooncake project with the following capabilities: + +1. **Code Pulling**: Automatically pulls E2E test code from GitHub +2. **Variable Replacement**: Scans scripts in the `scripts/` directory starting with `test_` and automatically replaces the following variables: + - `LOCAL_IP` - Local machine IP address + - `REMOTE_IP` - Remote machine IP address + - `ARTIFACT_ID` - GitHub Actions artifact ID + - `GIT_REPO` - Git repository URL for downloading whl packages +3. **Test Execution**: Automatically executes compliant test scripts +4. **Result Parsing**: Reads and parses test results from [test_results.json] files +5. **Log Collection**: Copies test logs to paths accessible by the T-One platform ## Homepage ## Version @@ -10,13 +20,14 @@ This test suite validates the Mooncake disaggregated inference system's function functional ## Parameters -- __ARTIFACT_ID__ : the artifact id -- __PYTHON_VERSION__ : the python version -- __REGISTRY_ADDR__ : the image used to run test -- __USE_HUGGINGFACE_MIRROR__ : whether to use huggingface mirror -- __HUGGINGFACE_MIRROR__ : the huggingface mirror -- __USE_MODELSCOPE__ : whether to use modelscope -- __REMOTE_TEST_DIR__ : the remote test directory +- __ARTIFACT_ID__: The artifact ID from GitHub Actions +- __TEST_GIT_REPO__: Repository containing test code (default: kvcache-ai/Mooncake) +- __GIT_REPO__: Repository for obtaining whl packages (default: kvcache-ai/Mooncake) +- __SERVER__ : Server IP +- __CLIENT__ : Client IP +- __BRANCH__ : Branch name of the repository for obtaining test code (default: main) +- __LOCAL_MOONCAKE_DIR__ : Local directory for Mooncake + ## Results ``` @@ -25,21 +36,6 @@ Pass/Fail ## Manual Run ``` -prefill: -python -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --disaggregation-mode prefill --port 30000 --host 192.168.0.145 --tp-size 2 --base-gpu-id=2 - -decode: -python -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --disaggregation-mode decode --port 30000 --host 192.168.0.137 --tp-size 2 --base-gpu-id=2 - -proxy: -python3 -m sglang_router.launch_router --pd-disaggregation --prefill http://192.168.0.145:30000 --decode http://192.168.0.137:30000 --host 0.0.0.0 --port 8000 - -curl -s -w "\n%{http_code}" -X POST http://127.0.0.1:8000/generate -H "Content-Type: application/json" -d '{ - "text": "Let me tell you a short story ", - "sampling_params": { - "temperature": 0 - } - }' --max-time 30 - +bash test_1p1d_erdma.sh ``` -For detailed implementation, you can examine the scripts in the ```scripts/``` directory. +For detailed implementation, please refer to https://github.com/kvcache-ai/Mooncake diff --git a/tests/mooncake-ci-test/common.sh b/tests/mooncake-ci-test/common.sh new file mode 100644 index 0000000000000000000000000000000000000000..2976a55f6c1067ed492da3e4afbd8d9161162088 --- /dev/null +++ b/tests/mooncake-ci-test/common.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# 采用git镜像加速下载并反复尝试 +# 两个参数:下载地址,目标文件夹 +# 三个参数:镜像地址,下载地址,目标文件夹 +mirror_download(){ + repo_url=$1 + if [ $# -eq 3 ]; then + if grep github.com /root/.ssh/known_hosts > /dev/null; then + repo_url=$2 + fi + name=$3 + else + name=$2 + fi + max_attempts=${max_attempts:-"5"} + max_time=${max_time:-"10m"} + attempt_count=0 + success=false + set_branch="" + CLONE_BRANCH=${CLONE_BRANCH:-""} + if [[ "X${CLONE_BRANCH}" != "X" ]]; then + set_branch="-b ${CLONE_BRANCH}" + fi + while [[ $attempt_count -lt $max_attempts ]]; do + pwd + echo "timeout $max_time git clone --depth=1 --progress $set_branch ${repo_url} $name" + timeout $max_time git clone --depth=1 --progress $set_branch ${repo_url} $name && success=true && break + + echo "Clone attempt $((attempt_count+1)) failed, cleaning up and retrying..." + rm -rf $name # Clean up failed clone directory + attempt_count=$((attempt_count+1)) + sleep 5 + done + if ! $success; then + echo "Failed to clone the repository after $max_attempts attempts." + return 1 + fi +} + +extract_ip_from_string() { + local str=$1 + if [ -z "$str" ]; then + echo "Error: Empty client string provided" >&2 + return 1 + fi + + local ip=$(echo "$str" | cut -d'-' -f2) + if [ -z "$ip" ]; then + echo "Error: Failed to extract IP from client string: $str" >&2 + return 1 + fi + + echo "$ip" + return 0 +} + +append_str() { + local original_str="$1" + local append_value="$2" + + if [ -z "$original_str" ]; then + echo "$append_value" + else + echo "${original_str}; ${append_value}" + fi +} + diff --git a/tests/mooncake-ci-test/install.sh b/tests/mooncake-ci-test/install.sh index d5a435a6fd289f69bbf8f5e492b6c10e9504e74f..4ab02284cfd610f705b3d767a44ddfda49a810a0 100644 --- a/tests/mooncake-ci-test/install.sh +++ b/tests/mooncake-ci-test/install.sh @@ -5,91 +5,47 @@ # WEB_URL= # GIT_URL= -CONTAINER_NAME=${CONTAINER_NAME:-"mooncake-ci-test"} -PYTHON_VERSION=${PYTHON_VERSION:-"3.12"} -MODEL_CACHE=${MODEL_CACHE:-"/root/.cache"} -readonly SSH_CMD="ssh -o StrictHostKeyChecking=no" -REGISTRY_ADDR=${REGISTRY_ADDR:-"lmsysorg/sglang:v0.5.5"} -# REGISTRY_ADDR : the image used to run test -# CLIENT= -# SERVER= -USE_HUGGINGFACE_MIRROR=${USE_HUGGINGFACE_MIRROR:-true} -HUGGINGFACE_MIRROR=${HUGGINGFACE_MIRROR:-"https://hf-mirror.com"} -USE_MODELSCOPE=${USE_MODELSCOPE:-true} -REMOTE_TEST_DIR=${REMOTE_TEST_DIR:-"/tmp/mooncake_ci_test"} -# PYTHON_VERSION : python version -# ARTIFACT_ID= +DEP_PKG_LIST="unzip jq" -. $TONE_BM_SUITE_DIR/scripts/common.sh +TEST_GIT_REPO=${TEST_GIT_REPO:-"kvcache-ai/Mooncake"} +BRANCH=${BRANCH:-"main"} +# LOCAL_MOONCAKE_DIR= -# fetch() +. $TONE_BM_SUITE_DIR/common.sh -build() -{ - : -} +# fetch() -install() +extract_src() { - # get client server ip - echo "===== Getting Server and Client IP =====" - client_ip=$(extract_ip_from_string $CLIENT) - server_ip=$(extract_ip_from_string $SERVER) - echo "SERVER: $server_ip, CLIENT: $client_ip" - - cat > ~/.shrc << EOF -# Mooncake CI Test Environment Variables -export CONTAINER_NAME=${CONTAINER_NAME} -export PYTHON_VERSION=${PYTHON_VERSION} -export MODEL_CACHE=${MODEL_CACHE} -export ARTIFACT_ID=${ARTIFACT_ID} -export REGISTRY_ADDR=${REGISTRY_ADDR} -export ISREMOTE=1 -export client_ip=${client_ip} -export server_ip=${server_ip} -export USE_HUGGINGFACE_MIRROR=${USE_HUGGINGFACE_MIRROR} -export HUGGINGFACE_MIRROR=${HUGGINGFACE_MIRROR} -export USE_MODELSCOPE=${USE_MODELSCOPE} -export BASE_DIR=${REMOTE_TEST_DIR} -EOF + mooncake_path=$TONE_BM_RUN_DIR/tone_tests + if [ -d "${mooncake_path}" ]; then + echo "Removing existing directory: ${mooncake_path}" + rm -rf ${mooncake_path} + fi - find $TONE_BM_SUITE_DIR/scripts -name '*.sh' -exec chmod +x {} \; + mkdir -p $(dirname ${mooncake_path}) - echo "===== Starting Remote Installation =====" - echo "SERVER: $SERVER, CLIENT: $CLIENT" - if [ -n "$CLIENT" ]; then - echo "Mooncake CI Test Environment Variables:" - cat ~/.shrc - echo "Copying test script to remote server $client_ip..." - ${SSH_CMD} $client_ip "rm -rf ${REMOTE_TEST_DIR} && mkdir -p ${REMOTE_TEST_DIR}" - scp -r $TONE_BM_SUITE_DIR/scripts $client_ip:${REMOTE_TEST_DIR} && \ - scp ~/.shrc $client_ip:~/.shrc - if [ $? -ne 0 ]; then - echo "Failed to copy files to remote server" - exit 1 - fi - - echo "Get mooncake whl on remote server $client_ip..." - ${SSH_CMD} $client_ip "source ~/.shrc && cd ${REMOTE_TEST_DIR} && source ./scripts/common.sh && get_whl" - if [ $? -ne 0 ]; then - echo "Failed to get mooncake whl on remote server" + if [ -n "${LOCAL_MOONCAKE_DIR}" ] && [ -d "${LOCAL_MOONCAKE_DIR}" ]; then + echo "Mooncake_DIR: ${LOCAL_MOONCAKE_DIR}" + cp -a ${LOCAL_MOONCAKE_DIR}/scripts/tone_tests ${mooncake_path} + else + export CLONE_BRANCH=$BRANCH + echo "===== Extracting Source Code =====" + rm -rf ${TONE_BM_BUILD_DIR}/* + mirror_download "https://kkgithub.com/$TEST_GIT_REPO.git" "https://github.com/$TEST_GIT_REPO.git" Mooncake + if [ "$?" -ne 0 ]; then exit 1 fi - echo "Remote installation completed successfully" - else - echo "No client specified, skipping remote installation" + cp -a ${TONE_BM_BUILD_DIR}/Mooncake/scripts/tone_tests ${mooncake_path} fi +} - echo "===== Starting Local Installation =====" - echo "Get mooncake whl on local machine..." - sed -i "s|^export ISREMOTE=.*$|export ISREMOTE=0|" ~/.shrc - sed -i 's|^export BASE_DIR=.*$|export BASE_DIR='"$TONE_BM_RUN_DIR"'|' ~/.shrc - source ~/.shrc && get_whl - if [ $? -ne 0 ]; then - echo "Failed to get mooncake whl on local machine" - exit 1 - fi - echo "Local installation completed successfully" +build() +{ + : +} - echo "===== Installation Completed =====" -} \ No newline at end of file +install() +{ + : +} diff --git a/tests/mooncake-ci-test/run.sh b/tests/mooncake-ci-test/run.sh index 6da1112325b38b41e78598632481d461d0bfc68a..90681f0f06b79dd1ee219f0abbae476a28ff3384 100644 --- a/tests/mooncake-ci-test/run.sh +++ b/tests/mooncake-ci-test/run.sh @@ -4,93 +4,120 @@ # Download variable: # WEB_URL= # GIT_URL= -CONTAINER_NAME=${CONTAINER_NAME:-"mooncake-ci-test"} -readonly SSH_CMD="ssh -o StrictHostKeyChecking=no" -SUPPORT_MODELS=("Qwen/Qwen3-8B" "deepseek-ai/DeepSeek-V2-Lite") +GIT_REPO=${GIT_REPO:-"kvcache-ai/Mooncake"} -run_single_model() -{ - local model_name=$1 - local model_name_clean=$(echo "$model_name" | sed 's/\//__/g') - - echo "===== Run MODEL NAME: $model_name =====" - # launch docker - echo "Launching docker container on local machine..." - $TONE_BM_SUITE_DIR/scripts/$test.sh setup $model_name_clean - echo -e "\n\nLaunching docker container on remote machine..." - ${SSH_CMD} $client_ip "source ~/.shrc; cd \$BASE_DIR/scripts && ./$test.sh setup $model_name_clean" +# GIT_REPO: The repository to fetch the wheel package from +# ARTIFACT_ID= +# CLIENT= +# SERVER= + +. $TONE_BM_SUITE_DIR/common.sh + +setup() +{ + server_ip=$(extract_ip_from_string "$SERVER") + client_ip=$(extract_ip_from_string "$CLIENT") - # Local start server - if ! $TONE_BM_SUITE_DIR/scripts/$test.sh start_server $model_name $model_name_clean; then - echo "ERROR: Failed to start local server for model $model_name" - exit 1 - fi - # Remote start server - if ! ${SSH_CMD} $client_ip "source ~/.shrc; cd \$BASE_DIR/scripts && ./$test.sh start_server $model_name $model_name_clean"; then - echo "ERROR: Failed to start remote server for model $model_name" - exit 1 - fi + echo "Server IP: $server_ip" + echo "Client IP: $client_ip" - # Local run proxy - if ! $TONE_BM_SUITE_DIR/scripts/$test.sh run_proxy $model_name_clean; then - echo "ERROR: Failed to start local proxy for model $model_name" - exit 1 - fi - sleep 5 - # Local Sending Test Request - $TONE_BM_SUITE_DIR/scripts/$test.sh run_request $model_name_clean + # Initialize tracking file + echo -n > "$TONE_BM_RUN_DIR/test_scripts.list" } run() { - source ~/.shrc - if [ -z "$client_ip" ] || [ -z "$server_ip" ]; then - echo "Please specify client and server IPs" + echo "===== Running Tests =====" + local test_scripts_dir="$TONE_BM_RUN_DIR/tone_tests/scripts" + local test_count=0 + + if [[ ! -d "$test_scripts_dir" ]]; then + echo "ERROR: Test scripts directory does not exist: $test_scripts_dir" exit 1 fi - echo "===== Running test case: $test for all supported models =====" - for model in "${SUPPORT_MODELS[@]}"; do - run_single_model "$model" - done + echo "Found test scripts:" + > "$TONE_BM_RUN_DIR/test_scripts.list" # Clear the list file + + while IFS= read -r -d '' script; do + echo "$script" >> "$TONE_BM_RUN_DIR/test_scripts.list" + echo "$script" + ((test_count++)) + done < <(find "$test_scripts_dir" -name "test_*" -type f -print0 2>/dev/null) + + if [[ "$test_count" -eq 0 ]]; then + echo "WARNING: No test scripts found in $test_scripts_dir" + return + fi + + while IFS= read -r script; do + local script_name=$(basename "$script") + echo "Running test script: $script_name" + + if grep -q "^ARTIFACT_ID=" "$script" 2>/dev/null; then + sed -i "s/^ARTIFACT_ID=.*/ARTIFACT_ID=\"$ARTIFACT_ID\"/" "$script" + fi + + if grep -q "^GIT_REPO=" "$script" 2>/dev/null; then + sed -i "s|^GIT_REPO=.*|GIT_REPO=\"$GIT_REPO\"|" "$script" + fi + + if grep -q "^LOCAL_IP=" "$script" 2>/dev/null; then + sed -i "s/^LOCAL_IP=.*/LOCAL_IP=\"$(hostname -I | awk '{print $1}')\"/" "$script" + fi + + if grep -q "^REMOTE_IP=" "$script" 2>/dev/null; then + sed -i "s/^REMOTE_IP=.*/REMOTE_IP=\"$client_ip\"/" "$script" + fi + + echo "Running test case: $script_name" + bash $script + done < "$TONE_BM_RUN_DIR/test_scripts.list" + + echo "===== Test Completed =====" } parse() { echo "===== Parsing Results =====" - local all_passed=true - - for model in "${SUPPORT_MODELS[@]}"; do - # Extract model name without path separators for log file name - local model_name_clean=$(echo "$model" | sed 's/\//__/g') - local log_file="$TONE_BM_RUN_DIR/$test/$model_name_clean/logs/curl_response.log" - - echo "Checking results for model: $model" - - if [ -f "$log_file" ]; then - curl_response=$(cat "$log_file") - - if echo "$curl_response" | grep -q "\"object\":\"error\""; then - error_message=$(echo "$curl_response" | grep -o '"message":"[^"]*"' | sed 's/"message":"//' | sed 's/"$//') - echo "ERROR for $model: $error_message" - echo "$model: Fail" - all_passed=false + local test_run_dir="$TONE_BM_RUN_DIR/tone_tests/run" + if [[ -d "$test_run_dir" ]]; then + echo "Copying test results from $test_run_dir to $TONE_CURRENT_RESULT_DIR" + cp -r "$test_run_dir" "$TONE_CURRENT_RESULT_DIR" + else + echo "WARNING: Test run directory does not exist: $test_run_dir" + return 1 + fi + + # Check if test_scripts.list exists and is not empty + if [[ ! -f "$TONE_BM_RUN_DIR/test_scripts.list" ]] || [[ ! -s "$TONE_BM_RUN_DIR/test_scripts.list" ]]; then + echo "WARNING: No tests were executed or tracking file is missing/empty" + return 1 + fi + + # Process each executed test script + while IFS= read -r script; do + local script_base_name=$(basename "$script") + local test_dir_name=${script_base_name%.sh} # Remove .sh extension + local json_file="${TONE_CURRENT_RESULT_DIR}/run/${test_dir_name}/test_results.json" + + # Check if the result file exists + if [[ -f "$json_file" ]]; then + echo "Parsing test result file: $json_file" + local test_case_name=$(jq -r '.test_case' "$json_file" 2>/dev/null) + local status=$(jq -r '.status' "$json_file" 2>/dev/null) + + if [[ "$test_case_name" != "null" && "$status" != "null" && -n "$test_case_name" && -n "$status" ]]; then + echo "$test_dir_name: $status" else - echo "$model: Pass" + echo "$test_dir_name: Fail" + echo "WARNING: Invalid test result file: $json_file" fi else - echo "ERROR: Curl response log not found for model $model at $log_file" - echo "$model: Fail" - all_passed=false + # Handle missing result file (indicates test failure) + echo "$test_dir_name: Fail" + echo "WARNING: Missing test result file: $json_file" fi - echo "" - done - - # Overall test result - if [ "$all_passed" = true ]; then - echo "$test: Pass" - else - echo "$test: Fail" - fi + done < "$TONE_BM_RUN_DIR/test_scripts.list" } \ No newline at end of file diff --git a/tests/mooncake-ci-test/scripts/1p1d-erdma-test.sh b/tests/mooncake-ci-test/scripts/1p1d-erdma-test.sh deleted file mode 100755 index 0d092b90bae9f4ff5728504d899abae6b4864ab9..0000000000000000000000000000000000000000 --- a/tests/mooncake-ci-test/scripts/1p1d-erdma-test.sh +++ /dev/null @@ -1,152 +0,0 @@ -#!/bin/bash - -source ~/.shrc - -if [ "$ISREMOTE" == "0" ]; then - . $TONE_BM_SUITE_DIR/scripts/common.sh -else - . $BASE_DIR/scripts/common.sh -fi - -# test name -test="1p1d-erdma-test" - -setup() -{ - local model_name=$1 - echo "===== Setting up Mooncake CI test =====" - echo "mkdir log path..." - setup_log_directory $test $model_name - - echo "Get the latest sglang image..." - if ! get_image; then - echo "ERROR: Failed to get the required image" - exit 1 - fi - - # qit old container - echo "Quit old container..." - if ! clean_container ${CONTAINER_NAME}; then - echo "ERROR: Failed to clean up container" - exit 1 - fi - - extra_args="" - extra_args="$extra_args --device=/dev/infiniband/uverbs0 --device=/dev/infiniband/uverbs1 --device=/dev/infiniband/rdma_cm " - if ${USE_HUGGINGFACE_MIRROR}; then - extra_args="$extra_args -e HF_ENDPOINT=${HUGGINGFACE_MIRROR}" - extra_args="$extra_args -e HF_HUB_ENABLE_HF_TRANSFER=1" - fi - if ${USE_MODELSCOPE}; then - extra_args="$extra_args -e SGLANG_USE_MODELSCOPE=true" - fi - echo "extra_args: $extra_args" - - echo "Launching docker container..." - if ! docker_launch "$extra_args"; then - echo "ERROR: Failed to launch docker container" - exit 1 - fi -} - -start_server() -{ - local host - local model_name=$1 - local model_name_clean=$2 - local sglang_server_log_path - if [ "$ISREMOTE" == "0" ]; then - host=$server_ip - sglang_server_log_path=/test_run/$test/$model_name_clean/logs/sglang_server_local.log - mode_name=prefill - else - host=$client_ip - sglang_server_log_path=/test_run/$test/$model_name_clean/logs/sglang_server_remote.log - mode_name=decode - fi - - echo "Checking SGLANG_USE_MODELSCOPE environment variable..." - ${docker_exec} "echo SGLANG_USE_MODELSCOPE=\$SGLANG_USE_MODELSCOPE" - - sglang_start_server_cmd_remote=" - ${docker_exec} \ - \"python -m sglang.launch_server --model-path ${model_name} \ - --disaggregation-mode $mode_name --port 30001 --host ${host} --tp-size 2 --base-gpu-id=6 > ${sglang_server_log_path} 2>&1 &\"" - echo "Start sglang server command:" - echo "$sglang_start_server_cmd_remote" - eval "$sglang_start_server_cmd_remote" - - exactly_sglang_server_log_path=$(echo "$sglang_server_log_path" | sed "s|/test_run/|$BASE_DIR/|") - if ! check_server_ready "$exactly_sglang_server_log_path"; then - return 1 - fi -} - -run_proxy(){ - local model_name=$1 - local proxy_log_path="/test_run/$test/$model_name/logs/load_balancer.log" - - echo "===== Proxy Run =====" - lb_cmd="${docker_exec} \"python3 -m sglang_router.launch_router --pd-disaggregation \ - --prefill http://${server_ip}:30001 --decode http://${client_ip}:30001 --host 0.0.0.0 \ - --port 8000 > $proxy_log_path 2>&1 &\"" - echo "Load balancer command:" - echo "$lb_cmd" - eval "$lb_cmd" - - exactly_proxy_log_path=$(echo "$proxy_log_path" | sed "s|/test_run/|$BASE_DIR/|") - if ! check_proxy_ready "$exactly_proxy_log_path"; then - return 1 - fi -} - -run_request(){ - local model_name=$1 - echo "===== Sending Test Request =====" - curl_response=$(curl -s -w "\n%{http_code}" -X POST http://127.0.0.1:8000/generate -H "Content-Type: application/json" -d '{ - "text": "Let me tell you a short story ", - "sampling_params": { - "temperature": 0 - } - }' --max-time 30) - response_body=$(echo "$curl_response" | head -n -1) - status_code=$(echo "$curl_response" | tail -n 1) - echo "Curl Response:" - echo "$response_body" - echo "Status Code: $status_code" - - echo "$response_body" > $TONE_BM_RUN_DIR/$test/$model_name/logs/curl_response.log - - if [ $status_code -eq 200 ]; then - echo "Test request successful!" - else - echo "Test request failed with status code $status_code" - fi -} - -case "$1" in - "setup") - shift - setup "$@" - ;; - "start_server") - shift - start_server "$@" - ;; - "run_proxy") - shift - run_proxy "$@" - ;; - "run_request") - shift - run_request "$@" - ;; - *) - echo "Usage: $0 {setup|start_server|run_proxy|run_request}" - echo " setup - Install Mooncake and dependencies" - echo " start_server - Start the SGLang server" - echo " run_proxy - Start the load balancer/proxy" - echo " run_request - Send test request" - exit 1 - ;; -esac \ No newline at end of file diff --git a/tests/mooncake-ci-test/scripts/common.sh b/tests/mooncake-ci-test/scripts/common.sh deleted file mode 100755 index 845dd126e3f04d723317fff855b4294c6c875265..0000000000000000000000000000000000000000 --- a/tests/mooncake-ci-test/scripts/common.sh +++ /dev/null @@ -1,246 +0,0 @@ -# !/bin/bash - -docker_exec="docker exec ${CONTAINER_NAME} bash -c" - -setup_log_directory(){ - local test_name=$1 - local model_name=$2 - - log_path="$BASE_DIR/$test/$model_name/logs" - [ -d $log_path ] && rm -rf $log_path - mkdir -p $log_path - - echo "Log directory set up at: $log_path" -} - -docker_launch(){ - local extra_args=$1 - image_server_id=`docker images -f "reference=${REGISTRY_ADDR}" | awk '{print $3}' | grep -v -i IMAGE` - - docker_run_cmd="docker run --name ${CONTAINER_NAME} \ - -d --ipc=host --cap-add=SYS_PTRACE --network=host --gpus all \ - --ulimit memlock=-1 --ulimit stack=67108864 --shm-size=128g \ - -v ${MODEL_CACHE}:/root/.cache $extra_args --privileged \ - -v $BASE_DIR:/test_run \ - --entrypoint bash \ - ${image_server_id} -c \"hostname;sleep 360000\"" - - echo "Executing Docker run command:" - echo "$docker_run_cmd" - if ! eval "$docker_run_cmd"; then - echo "ERROR: Failed to launch docker container" >&2 - return 1 - fi - - pip_cmd="" - erdm_dirver_cmd='wget -qO - http://mirrors.cloud.aliyuncs.com/erdma/GPGKEY | gpg --dearmour -o /etc/apt/trusted.gpg.d/erdma.gpg && \ - echo "deb [ ] http://mirrors.cloud.aliyuncs.com/erdma/apt/ubuntu jammy/erdma main" | tee /etc/apt/sources.list.d/erdma.list && \ - apt update && \ - apt install libibverbs1 ibverbs-providers ibverbs-utils librdmacm1 -y' - mooncake_whl_file=$(ls $BASE_DIR/whls/*.whl 2>/dev/null | xargs -n 1 basename | head -n 1) - if [ -z "$mooncake_whl_file" ]; then - echo "No wheel file found in $BASE_DIR/whls/" - return 1 - fi - pip_cmd=$(append_str "${pip_cmd}" "pip install /test_run/whls/$mooncake_whl_file") - - echo "Installing ERDMA drivers" - echo "Executing ERDMA driver installation command:" - echo "${erdm_dirver_cmd}" - if ! ${docker_exec} "${erdm_dirver_cmd}"; then - echo "ERROR: Failed to install ERDMA drivers" >&2 - return 1 - fi - - echo "Checking RDMA devices" - echo "Executing ibv_devinfo check command:" - echo "ibv_devinfo" - if ! ${docker_exec} "ibv_devinfo" >/dev/null 2>&1; then - echo "ibv_devinfo execution failed" >&2 - return 1 - else - echo "ibv_devinfo execution successful" - fi - - # install mooncake and upgrade sglang - echo "=== Installing Mooncake and dependencies ===" - echo "Executing pip installation commands:" - IFS=';' read -ra COMMANDS <<< "$pip_cmd" - for cmd in "${COMMANDS[@]}"; do - echo "Command: $cmd" - done - if ! ${docker_exec} "${pip_cmd}"; then - echo "ERROR: Failed to install Mooncake dependencies" >&2 - return 1 - fi - - return 0 -} - -clean_container(){ - local container_name=$1 - if [ -z "$container_name" ]; then - echo "No container name provided" - return 1 - fi - - # check if container exists - if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then - echo "Stopping and removing existing container: ${container_name}" - # stop container - docker stop ${container_name} >/dev/null 2>&1 - # remove container - docker rm ${container_name} >/dev/null 2>&1 - if [ $? -eq 0 ]; then - echo "Successfully removed container: ${container_name}" - else - echo "Failed to remove container: ${container_name}" - return 1 - fi - else - echo "No existing container named: ${container_name}" - fi - - return 0 -} - -append_str() { - local original_str="$1" - local append_value="$2" - - if [ -z "$original_str" ]; then - echo "$append_value" - else - echo "${original_str}; ${append_value}" - fi -} - -check_server_ready() { - local server_log_path=$1 - local max_attempts=${2:-60} - - if [ -z "$server_log_path" ]; then - echo "ERROR: Server log path not provided" >&2 - return 1 - fi - - echo "Waiting for server to be ready (checking: $server_log_path)..." - for i in $(seq 1 $max_attempts); do - if [ -f "$server_log_path" ]; then - if grep -q 'The server is fired up and ready to roll!' "$server_log_path" 2>/dev/null; then - echo "Server is ready!" - return 0 - fi - echo "Waiting... ($i/$max_attempts)" - sleep 2 - fi - done - - echo "ERROR: Server failed to start within timeout" - return 1 -} - -get_whl(){ - whls_path=$BASE_DIR/whls - echo "whls_path: $whls_path and mkdir..." - mkdir -p $whls_path - - echo "get whl file from github action" - rm -f mooncake.zip - rm -f *.whl - gh api -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/kvcache-ai/Mooncake/actions/artifacts/$ARTIFACT_ID/zip \ - > mooncake.zip - unzip -o mooncake.zip - - rm -f $whls_path/*.whl - mooncake_whl_file=$(basename "$(find . -name "*.whl" -type f | head -n 1)") - if [ -z "$mooncake_whl_file" ]; then - echo "No wheel file found in the extracted archive" - return 1 - fi - echo "Found wheel file: $mooncake_whl_file" - cp $mooncake_whl_file $whls_path - if [ $? -ne 0 ]; then - echo "Failed to copy wheel file to destination" - return 1 - fi - - echo "Successfully downloaded and copied wheel file" - return 0 -} - -get_image(){ - # only support run in container - if [ -z "$REGISTRY_ADDR" ]; then - if [ "$PYTHON_VERSION" == "3.12" ]; then - REGISTRY_ADDR=lmsysorg/sglang:latest - else - echo "Please provide REGISTRY_ADDR!" - return 1 - fi - fi - if ! docker inspect $REGISTRY_ADDR >/dev/null 2>&1; then - echo "Image ${REGISTRY_ADDR} not found, pulling..." - docker pull $REGISTRY_ADDR - if [ $? -ne 0 ]; then - echo "Failed to pull image ${REGISTRY_ADDR}" - return 1 - fi - else - echo "Image ${REGISTRY_ADDR} already exists, skipping pull" - fi - - return 0 -} - -extract_ip_from_string() { - local str=$1 - if [ -z "$str" ]; then - echo "Error: Empty client string provided" >&2 - return 1 - fi - - local ip=$(echo "$str" | cut -d'-' -f2) - if [ -z "$ip" ]; then - echo "Error: Failed to extract IP from client string: $str" >&2 - return 1 - fi - - echo "$ip" - return 0 -} - -check_proxy_ready() { - local proxy_log_path=$1 - local max_attempts=${2:-60} - - if [ -z "$proxy_log_path" ]; then - echo "ERROR: Proxy log path not provided" >&2 - return 1 - fi - - echo "Waiting for load balancer to be ready and workers to be activated..." - echo "Checking log file: $proxy_log_path" - for i in $(seq 1 $max_attempts); do - if [ -f "$proxy_log_path" ]; then - # Check if both workers are activated - server_activated=$(grep -c "Activated worker http://${server_ip}:30001" "$proxy_log_path" 2>/dev/null) || server_activated=0 - client_activated=$(grep -c "Activated worker http://${client_ip}:30001" "$proxy_log_path" 2>/dev/null) || client_activated=0 - - if [ "$server_activated" -gt 0 ] && [ "$client_activated" -gt 0 ]; then - echo "Load balancer is ready with both workers activated!" - echo " - Server worker (http://${server_ip}:30001): $server_activated time(s)" - echo " - Client worker (http://${client_ip}:30001): $client_activated time(s)" - return 0 - fi - fi - echo "Waiting... ($i/$max_attempts)" - sleep 2 - done - - echo "ERROR: Server failed to start within timeout" - return 1 -} - -