Skip to content

Commit a2a7961

Browse files
authored
Merge pull request #851 from FedML-AI/dev/v0.7.0
Sync the model inference.
2 parents 1b18774 + fb0d323 commit a2a7961

File tree

28 files changed

+323
-323
lines changed

28 files changed

+323
-323
lines changed

devops/Jenkinsfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ pipeline {
217217
sh 'cp -Rf ./aws/* ./devops/scripts/aws'
218218
sh 'cd ./devops/scripts/;unzip ./docker.zip;unzip ./kubectl.zip;tar -zxvf ./helm-v3.10.3-linux-amd64.tar.gz;mv linux-amd64/helm /usr/local/bin/helm;cd ../../'
219219

220-
//sh './devops/scripts/build-fedml-docker.sh'
220+
sh './devops/scripts/build-fedml-docker.sh'
221221

222222
sh 'docker build --network=host -f ./devops/dockerfile/device-image/Dockerfile-Base -t public.ecr.aws/x6k8q1x9/fedml-device-image:base .'
223223

@@ -305,7 +305,7 @@ pipeline {
305305
container('base') {
306306
withCredentials([usernamePassword(passwordVariable : 'DOCKERHUB_PASSWORD' ,usernameVariable : 'DOCKERHUB_USERNAME' ,credentialsId : "$DOCKERHUB_CREDENTIAL_ID" ,)]) {
307307
sh 'docker login --username $DOCKERHUB_USERNAME --password $DOCKERHUB_PASSWORD'
308-
//sh './devops/scripts/push-fedml-docker.sh'
308+
sh './devops/scripts/push-fedml-docker.sh'
309309
}
310310

311311
withCredentials([usernamePassword(passwordVariable : 'AWS_IAM_ACCESS_ID' ,usernameVariable : 'AWS_ECR_USERNAME' ,credentialsId : "$AWS_ECR_CREDENTIAL_ID_PRODUCTION" ,)]) {

devops/dockerfile/device-image/Dockerfile-Base

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
ARG VERSION=base
22
ARG IS_BUILDING_GPU_IMAGE=0
33
#ARG BASE_IMAGE=continuumio/miniconda3:4.7.12
4-
ARG BASE_IMAGE=fedml/fedml:latest-torch1.12.1-cuda11.3-cudnn8-devel
4+
ARG BASE_IMAGE=fedml/fedml:latest-torch1.13.1-cuda11.6-cudnn8-devel
55
FROM ${BASE_IMAGE}
66

77
ADD ./devops/scripts/aws ./fedml/aws

devops/dockerfile/model-inference-ingress/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG VERSION=dev
22
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
3-
ARG FEDML_PIP_HOME=/usr/local/lib/python3.7/dist-packages/fedml
3+
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
44
FROM ${BASE_IMAGE}
55

66
ADD ./devops/scripts/runner.sh ./fedml/runner.sh

devops/dockerfile/model-premise-master/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG VERSION=dev
22
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
3-
ARG FEDML_PIP_HOME=/usr/local/lib/python3.7/dist-packages/fedml
3+
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
44
FROM ${BASE_IMAGE}
55

66
ADD ./devops/scripts/runner.sh ./fedml/runner.sh

devops/dockerfile/model-premise-slave/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
ARG VERSION=dev
22
ARG IS_BUILDING_GPU_IMAGE=0
33
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
4-
ARG FEDML_PIP_HOME=/usr/local/lib/python3.7/dist-packages/fedml
4+
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
55
FROM ${BASE_IMAGE}
66

77
ADD ./devops/scripts/runner.sh ./fedml/runner.sh
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
ARG VERSION=dev
2+
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
3+
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
4+
FROM ${BASE_IMAGE} AS fedml-inference-env-base
5+
6+
ADD ./devops/scripts/runner.sh ./fedml/runner.sh
7+
8+
ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt
9+
10+
RUN chmod a+x ./fedml/runner.sh
11+
RUN echo "Updating..."
12+
13+
RUN pip3 install -r ./fedml/requirements.txt
14+
15+
COPY ./python ./fedml/fedml-pip
16+
COPY ./python/fedml ${FEDML_PIP_HOME}
17+
WORKDIR ./fedml/fedml-pip
18+
RUN pip3 install -e ./
19+
20+
WORKDIR /fedml
21+
22+
23+
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
24+
FROM ${BASE_IMAGE} AS fedml-inference-master
25+
26+
WORKDIR /fedml
27+
28+
COPY --from=fedml-inference-env-base /fedml/fedml-pip /fedml/fedml-pip
29+
COPY --from=fedml-inference-env-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}
30+
31+
ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} SERVER_DEVICE_ID=0 SERVER_OS_NAME=linux INFER_HOST="127.0.0.1" \
32+
FEDML_REDIS_ADDR="127.0.0.1" FEDML_REDIS_PORT=6379 FEDML_REDIS_PASSWORD="fedml_default"
33+
34+
CMD fedml model device login ${ACCOUNT_ID} -v ${FEDML_VERSION} -p -m \
35+
-ih ${INFER_HOST} -id ${SERVER_DEVICE_ID} -os ${SERVER_OS_NAME} \
36+
-ra ${FEDML_REDIS_ADDR} -rp ${FEDML_REDIS_PORT} -rpw ${FEDML_REDIS_PASSWORD};./runner.sh
37+
38+
39+
FROM ${BASE_IMAGE} AS fedml-inference-slave
40+
41+
WORKDIR /fedml
42+
43+
COPY --from=fedml-inference-env-base /fedml/fedml-pip /fedml/fedml-pip
44+
COPY --from=fedml-inference-env-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}
45+
46+
ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} CLIENT_DEVICE_ID=0 CLIENT_OS_NAME=linux INFER_HOST="127.0.0.1"
47+
48+
CMD fedml model device login ${ACCOUNT_ID} -v ${FEDML_VERSION} -p \
49+
-id ${CLIENT_DEVICE_ID} -os ${CLIENT_OS_NAME} -ih ${INFER_HOST}; ./runner.sh
50+
51+
52+
FROM ${BASE_IMAGE} AS fedml-inference-ingress
53+
54+
WORKDIR /fedml
55+
56+
COPY --from=fedml-inference-env-base /fedml/fedml-pip /fedml/fedml-pip
57+
COPY --from=fedml-inference-env-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}
58+
59+
ENV FEDML_REDIS_ADDR="local" FEDML_REDIS_PORT=6379 FEDML_REDIS_PASSWORD="fedml_default" \
60+
FEDML_END_POINT_ID=0 FEDML_MODEL_ID=0 \
61+
FEDML_MODEL_NAME="model" FEDML_MODEL_VERSION="v1" \
62+
FEDML_INFER_URL="infer" FEDML_CONFIG_VERSION="release" \
63+
FEDML_INFER_PORT=5001
64+
65+
CMD python3 ${FEDML_PIP_HOME}/cli/model_deployment/device_model_inference_entry.py \
66+
-rp ${FEDML_REDIS_ADDR} -ra ${FEDML_REDIS_PORT} -rpw ${FEDML_REDIS_PASSWORD} \
67+
-ep ${FEDML_END_POINT_ID} -mi ${FEDML_MODEL_ID} \
68+
-mn ${FEDML_MODEL_NAME} -mv ${FEDML_MODEL_VERSION} \
69+
-iu ${FEDML_INFER_URL} -cv ${FEDML_CONFIG_VERSION} \
70+
-ip ${FEDML_INFER_PORT};./runner.sh
71+
72+
73+
ARG INF_BACKEND_BASE_IMAGE=nvcr.io/nvidia/tritonserver:22.01-py3
74+
FROM ${INF_BACKEND_BASE_IMAGE} AS fedml-inference-backend
75+
76+
ADD ./devops/scripts/runner.sh ./fedml/runner.sh
77+
78+
ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt
79+
80+
RUN chmod a+x ./fedml/runner.sh
81+
82+
ENV FEDML_MODEL_SERVING_REPO_SCAN_INTERVAL=3 \
83+
FEDML_MODEL_SERVING_REPO_PATH=/home/fedml/fedml-client/fedml/models_serving
84+
85+
CMD mkdir -p ${FEDML_MODEL_SERVING_REPO_PATH};tritonserver --model-control-mode=poll \
86+
--strict-model-config=false \
87+
--backend-config=onnxruntime,default-max-batch-size=1 \
88+
--repository-poll-secs=${FEDML_MODEL_SERVING_REPO_SCAN_INTERVAL} \
89+
--model-repository=${FEDML_MODEL_SERVING_REPO_PATH}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
ARG VERSION=dev
2+
ARG IS_BUILDING_GPU_IMAGE=0
3+
#ARG BASE_IMAGE=continuumio/miniconda3:4.7.12
4+
ARG BASE_IMAGE=fedml/fedml:latest-torch1.13.1-cuda11.6-cudnn8-devel
5+
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
6+
FROM ${BASE_IMAGE} AS fedml-image-base
7+
8+
ADD ./devops/scripts/aws ./fedml/aws
9+
ADD ./devops/scripts/setup-aws-cli.sh ./fedml/setup-aws-cli.sh
10+
ADD ./devops/scripts/set-aws-credentials.sh ./fedml/set-aws-credentials.sh
11+
ADD ./devops/scripts/docker /usr/bin/
12+
ADD ./devops/scripts/kubectl /usr/bin/
13+
14+
ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt
15+
ADD ./devops/scripts/setup-conda-env.sh ./fedml/setup-conda-env.sh
16+
17+
RUN chmod a+x /usr/bin/docker
18+
RUN chmod a+x /usr/bin/kubectl
19+
RUN chmod a+x ./fedml/setup-aws-cli.sh
20+
RUN chmod a+x ./fedml/set-aws-credentials.sh
21+
RUN ./fedml/setup-aws-cli.sh
22+
23+
RUN apt-get update
24+
#RUN apt-get -y install gettext-base
25+
#RUN apt-get -y install unar wget
26+
#RUN apt-get -y install libquadmath0
27+
28+
RUN sudo DEBIAN_FRONTEND=noninteractive apt-get install -qq gettext-base < /dev/null > /dev/null
29+
RUN sudo DEBIAN_FRONTEND=noninteractive apt-get install -qq unar wget < /dev/null > /dev/null
30+
RUN sudo DEBIAN_FRONTEND=noninteractive apt-get install -qq libquadmath0 < /dev/null > /dev/null
31+
32+
RUN chmod a+x ./fedml/setup-conda-env.sh
33+
RUN bash ./fedml/setup-conda-env.sh ${IS_BUILDING_GPU_IMAGE}
34+
35+
#RUN pip install --upgrade pip
36+
#RUN pip3 uninstall fedml
37+
38+
RUN pip3 install --upgrade fedml
39+
RUN pip3 install -r ./fedml/requirements.txt
40+
RUN pip3 install --upgrade gevent
41+
42+
43+
FROM ${BASE_IMAGE} AS fedml-training-env-base
44+
45+
ADD ./devops/scripts/runner.sh ./fedml/runner.sh
46+
47+
ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt
48+
49+
RUN chmod a+x ./fedml/runner.sh
50+
RUN echo "Updating..."
51+
52+
RUN pip3 install -r ./fedml/requirements.txt
53+
54+
COPY ./python ./fedml/fedml-pip
55+
COPY ./python/fedml ${FEDML_PIP_HOME}
56+
WORKDIR ./fedml/fedml-pip
57+
RUN pip3 install -e ./
58+
#RUN pip3 install -e '.[tensorflow]'
59+
#RUN pip3 install -e '.[jax]'
60+
#RUN pip3 install -e '.[mxnet]'
61+
62+
WORKDIR /fedml
63+
64+
65+
FROM ${BASE_IMAGE} AS fedml-image-base-with-version
66+
67+
RUN pip3 install MNN==1.1.6
68+
69+
WORKDIR /fedml
70+
71+
COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
72+
COPY --from=fedml-training-env-base /fedml/fedml-pip/fedml ${FEDML_PIP_HOME}
73+
74+
ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_DEVICE_ID=0 \
75+
FEDML_PACKAGE_NAME=package FEDML_PACKAGE_URL=s3_url \
76+
FEDML_RUNNER_CMD=3dsad
77+
78+
CMD fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -s -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}; ./runner.sh
79+
80+
81+
FROM ${BASE_IMAGE} AS fedml-client-agent
82+
83+
WORKDIR /fedml
84+
85+
COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
86+
COPY --from=fedml-training-env-base /fedml/fedml-pip/fedml ${FEDML_PIP_HOME}
87+
88+
ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} CLIENT_DEVICE_ID=0 CLIENT_OS_NAME=linux
89+
90+
CMD fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -c -id ${CLIENT_DEVICE_ID} -os ${CLIENT_OS_NAME}; ./runner.sh
91+
92+
93+
FROM ${BASE_IMAGE} AS fedml-server-agent
94+
95+
WORKDIR /fedml
96+
97+
COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
98+
COPY --from=fedml-image-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}
99+
100+
ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_AGENT_ID=0 \
101+
AWS_IAM_ACCESS_ID=0 \
102+
AWS_IAM_ACCESS_KEY=0 \
103+
AWS_REGION=0
104+
105+
CMD ./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -s -r cloud_agent -id ${SERVER_AGENT_ID};./runner.sh
106+
107+
108+
FROM ${BASE_IMAGE} AS fedml-edge-server
109+
110+
WORKDIR /fedml
111+
112+
COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
113+
COPY --from=fedml-image-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}
114+
115+
ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} SERVER_DEVICE_ID=0 SERVER_OS_NAME=linux
116+
117+
CMD fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -s -id ${SERVER_DEVICE_ID} -os ${SERVER_OS_NAME};./runner.sh

devops/k8s/README_MODEL_SERVING.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ Moreover, on GCP k8s cluster, you should set up your GPU nodes based on the foll
101101
After you have installed FedML model serving packages, you may run the helm upgrade commands to modify parameters.
102102

103103
e.g.
104-
```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-0.7.397.tgz -n $YourNameSpace```
104+
```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-latest.tgz -n $YourNameSpace```
105105

106106
### 6). Config your CNAME record in your DNS provider (Godaddy, wordpress, AWS Route 53...)
107107
#### (a). Find the Kubernetes nginx ingress named 'fedml-model-inference-gateway' in your Kubernetes cluster.
@@ -150,7 +150,7 @@ Pull remote model(ModelOps) to local model repository:
150150
1. Q: Supports automatically scale?
151151
A: Yes. Call CLI `helm upgrade`. For example, you can do upgrade by using the following CLI:
152152

153-
```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-0.7.397.tgz -n $YourNameSpace```
153+
```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-latest.tgz -n $YourNameSpace```
154154

155155

156156
2. Q: Does the inference endpoint supports private IP? \
-6 Bytes
Binary file not shown.

devops/k8s/fedml-model-premise-master/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.7.700
18+
version: 0.8.2
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to
0 Bytes
Binary file not shown.

devops/k8s/fedml-model-premise-slave/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.7.700
18+
version: 0.8.2
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

devops/k8s/fedml-model-premise-slave/templates/deployment.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ spec:
7777
resources:
7878
limits:
7979
memory: 8192Mi
80+
requests:
81+
memory: 4096Mi
8082
volumeMounts:
8183
- name: home-dir
8284
mountPath: {{ .Values.volume.clientHomeDirMountPath }}

0 commit comments

Comments
 (0)