update code
This commit is contained in:
49
Dockerfile
49
Dockerfile
@@ -1,49 +0,0 @@
|
|||||||
|
|
||||||
FROM harbor.4pd.io/inf/base-python3.8-ubuntu:1.1.0
|
|
||||||
MAINTAINER shiguangchuan@4paradigm.com
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
|
|
||||||
COPY ssh-keygen /bin
|
|
||||||
|
|
||||||
RUN wget -q ftp://ftp.4pd.io/pub/pico/temp/pynini-2.1.6-cp38-cp38-manylinux_2_31_x86_64.whl && pip install pynini-2.1.6-cp38-cp38-manylinux_2_31_x86_64.whl && rm -f pynini-2.1.6-c p38-cp38-manylinux_2_31_x86_64.whl
|
|
||||||
|
|
||||||
ADD ./requirements.txt /workspace
|
|
||||||
RUN pip install -r ./requirements.txt -i https://nexus.4pd.io/repository/pypi-all/simple --trusted-host nexus.4pd.io --extra-index-url https://mirrors.aliyun.com/pypi/simple/ \
|
|
||||||
&& pip cache purge \
|
|
||||||
&& ssh-keygen -f /workspace/ssh-key-ecdsa -t ecdsa -b 521 -q -N ""
|
|
||||||
|
|
||||||
ADD . /workspace
|
|
||||||
|
|
||||||
EXPOSE 80
|
|
||||||
|
|
||||||
CMD ["python3", "run_callback.py"]
|
|
||||||
|
|
||||||
|
|
||||||
###########################
|
|
||||||
## Dockerfile(更新后)
|
|
||||||
#FROM harbor.4pd.io/lab-platform/inf/python:3.9
|
|
||||||
|
|
||||||
#WORKDIR /app
|
|
||||||
|
|
||||||
## 安装依赖
|
|
||||||
##RUN pip install torch librosa flask
|
|
||||||
|
|
||||||
##RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
|
|
||||||
## pip cache purge && \
|
|
||||||
## pip --default-timeout=1000 install torch librosa flask
|
|
||||||
|
|
||||||
## 删除原来的 COPY pytorch_model.bin /app/
|
|
||||||
|
|
||||||
#COPY inference.py /app/
|
|
||||||
# 只需要复制启动脚本
|
|
||||||
|
|
||||||
#EXPOSE 80
|
|
||||||
|
|
||||||
#CMD ["python", "inference.py"]
|
|
||||||
####################
|
|
||||||
|
|
||||||
|
|
||||||
##############################更新0731#################################
|
|
||||||
|
|
||||||
|
|
||||||
BIN
helm-chart/.DS_Store
vendored
BIN
helm-chart/.DS_Store
vendored
Binary file not shown.
@@ -1,77 +0,0 @@
|
|||||||
## judgeflow chart 的要求
|
|
||||||
|
|
||||||
### values.yaml 文件必须包含如下字段,并且模板中必须引用 values.yaml 中的如下字段
|
|
||||||
|
|
||||||
```
|
|
||||||
podLabels
|
|
||||||
env
|
|
||||||
volumeMounts
|
|
||||||
volumes
|
|
||||||
affinity
|
|
||||||
```
|
|
||||||
|
|
||||||
### values.yaml 文件必须在 volumeMounts 中声明如下卷
|
|
||||||
|
|
||||||
```
|
|
||||||
workspace
|
|
||||||
submit
|
|
||||||
datafile
|
|
||||||
```
|
|
||||||
|
|
||||||
## 被测服务(sut) chart 的要求
|
|
||||||
|
|
||||||
### values.yaml 文件必须包含如下字段,并且资源模板中必须引用 values.yaml 中的如下字段
|
|
||||||
|
|
||||||
```
|
|
||||||
podLabels
|
|
||||||
affinity
|
|
||||||
```
|
|
||||||
|
|
||||||
针对 podLabels 字段,values.yaml 中配置格式如下:
|
|
||||||
|
|
||||||
```
|
|
||||||
podLabels: {}
|
|
||||||
```
|
|
||||||
|
|
||||||
下面给出示例
|
|
||||||
|
|
||||||
podLabels
|
|
||||||
|
|
||||||
values.yaml
|
|
||||||
|
|
||||||
templates/deployment.yaml
|
|
||||||
|
|
||||||
```
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
{{- with .Values.podLabels }}
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
```
|
|
||||||
|
|
||||||
affinity
|
|
||||||
|
|
||||||
values.yaml
|
|
||||||
|
|
||||||
```
|
|
||||||
affinity: {}
|
|
||||||
```
|
|
||||||
|
|
||||||
templates/deployment.yaml
|
|
||||||
|
|
||||||
```
|
|
||||||
spec:
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
{{- with .Values.affinity }}
|
|
||||||
affinity:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 如果需要在 sut 中使用共享存储,则 sut chart 的 values.yaml 也必须包含如下字段,且模板中必须引用 values.yaml 中的如下字段
|
|
||||||
|
|
||||||
```
|
|
||||||
volumeMounts
|
|
||||||
volumes
|
|
||||||
```
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
# Patterns to ignore when building packages.
|
|
||||||
# This supports shell glob matching, relative path matching, and
|
|
||||||
# negation (prefixed with !). Only one pattern per line.
|
|
||||||
.DS_Store
|
|
||||||
# Common VCS dirs
|
|
||||||
.git/
|
|
||||||
.gitignore
|
|
||||||
.bzr/
|
|
||||||
.bzrignore
|
|
||||||
.hg/
|
|
||||||
.hgignore
|
|
||||||
.svn/
|
|
||||||
# Common backup files
|
|
||||||
*.swp
|
|
||||||
*.bak
|
|
||||||
*.tmp
|
|
||||||
*.orig
|
|
||||||
*~
|
|
||||||
# Various IDEs
|
|
||||||
.project
|
|
||||||
.idea/
|
|
||||||
*.tmproj
|
|
||||||
.vscode/
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
apiVersion: v2
|
|
||||||
name: ${chartName}
|
|
||||||
description: Leaderboard judgeflow helm chart for demo
|
|
||||||
|
|
||||||
# A chart can be either an 'application' or a 'library' chart.
|
|
||||||
#
|
|
||||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
|
||||||
# to be deployed.
|
|
||||||
#
|
|
||||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
|
||||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
|
||||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
|
||||||
type: application
|
|
||||||
|
|
||||||
# This is the chart version. This version number should be incremented each time you make changes
|
|
||||||
# to the chart and its templates, including the app version.
|
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
|
||||||
version: ${version}
|
|
||||||
|
|
||||||
# This is the version number of the application being deployed. This version number should be
|
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
|
||||||
# It is recommended to use it with quotes.
|
|
||||||
appVersion: "${appVersion}"
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
{{/*
|
|
||||||
Expand the name of the chart.
|
|
||||||
*/}}
|
|
||||||
{{- define "judgeflow.name" -}}
|
|
||||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create a default fully qualified app name.
|
|
||||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
|
||||||
If release name contains chart name it will be used as a full name.
|
|
||||||
*/}}
|
|
||||||
{{- define "judgeflow.fullname" -}}
|
|
||||||
{{- if .Values.fullnameOverride }}
|
|
||||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
|
||||||
{{- if contains $name .Release.Name }}
|
|
||||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create chart name and version as used by the chart label.
|
|
||||||
*/}}
|
|
||||||
{{- define "judgeflow.chart" -}}
|
|
||||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Common labels
|
|
||||||
*/}}
|
|
||||||
{{- define "judgeflow.labels" -}}
|
|
||||||
helm.sh/chart: {{ include "judgeflow.chart" . }}
|
|
||||||
{{ include "judgeflow.selectorLabels" . }}
|
|
||||||
{{- if .Chart.AppVersion }}
|
|
||||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|
||||||
{{- end }}
|
|
||||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Selector labels
|
|
||||||
*/}}
|
|
||||||
{{- define "judgeflow.selectorLabels" -}}
|
|
||||||
app.kubernetes.io/name: {{ include "judgeflow.name" . }}
|
|
||||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create the name of the service account to use
|
|
||||||
*/}}
|
|
||||||
{{- define "judgeflow.serviceAccountName" -}}
|
|
||||||
{{- if .Values.serviceAccount.create }}
|
|
||||||
{{- default (include "judgeflow.fullname" .) .Values.serviceAccount.name }}
|
|
||||||
{{- else }}
|
|
||||||
{{- default "default" .Values.serviceAccount.name }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
{{- if .Values.autoscaling.enabled }}
|
|
||||||
apiVersion: autoscaling/v2
|
|
||||||
kind: HorizontalPodAutoscaler
|
|
||||||
metadata:
|
|
||||||
name: {{ include "judgeflow.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "judgeflow.labels" . | nindent 4 }}
|
|
||||||
spec:
|
|
||||||
scaleTargetRef:
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
name: {{ include "judgeflow.fullname" . }}
|
|
||||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
|
||||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
|
||||||
metrics:
|
|
||||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
|
||||||
- type: Resource
|
|
||||||
resource:
|
|
||||||
name: cpu
|
|
||||||
target:
|
|
||||||
type: Utilization
|
|
||||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
|
||||||
- type: Resource
|
|
||||||
resource:
|
|
||||||
name: memory
|
|
||||||
target:
|
|
||||||
type: Utilization
|
|
||||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
{{- if .Values.ingress.enabled -}}
|
|
||||||
{{- $fullName := include "judgeflow.fullname" . -}}
|
|
||||||
{{- $svcPort := .Values.service.port -}}
|
|
||||||
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
|
||||||
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
|
||||||
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
|
||||||
apiVersion: networking.k8s.io/v1beta1
|
|
||||||
{{- else -}}
|
|
||||||
apiVersion: extensions/v1beta1
|
|
||||||
{{- end }}
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: {{ $fullName }}
|
|
||||||
labels:
|
|
||||||
{{- include "judgeflow.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.ingress.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
|
|
||||||
ingressClassName: {{ .Values.ingress.className }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.ingress.tls }}
|
|
||||||
tls:
|
|
||||||
{{- range .Values.ingress.tls }}
|
|
||||||
- hosts:
|
|
||||||
{{- range .hosts }}
|
|
||||||
- {{ . | quote }}
|
|
||||||
{{- end }}
|
|
||||||
secretName: {{ .secretName }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
rules:
|
|
||||||
{{- range .Values.ingress.hosts }}
|
|
||||||
- host: {{ .host | quote }}
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
{{- range .paths }}
|
|
||||||
- path: {{ .path }}
|
|
||||||
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
|
|
||||||
pathType: {{ .pathType }}
|
|
||||||
{{- end }}
|
|
||||||
backend:
|
|
||||||
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
|
|
||||||
service:
|
|
||||||
name: {{ $fullName }}
|
|
||||||
port:
|
|
||||||
number: {{ $svcPort }}
|
|
||||||
{{- else }}
|
|
||||||
serviceName: {{ $fullName }}
|
|
||||||
servicePort: {{ $svcPort }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
apiVersion: batch/v1
|
|
||||||
kind: Job
|
|
||||||
metadata:
|
|
||||||
name: {{ include "judgeflow.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "judgeflow.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.podLabels }}
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
{{- include "judgeflow.labels" . | nindent 8 }}
|
|
||||||
{{- with .Values.podLabels }}
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
{{- with .Values.priorityclassname }}
|
|
||||||
priorityClassName: "{{ . }}"
|
|
||||||
{{- end }}
|
|
||||||
containers:
|
|
||||||
- name: {{ .Chart.Name }}
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
|
||||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
||||||
{{- with .Values.env }}
|
|
||||||
env:
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if and (hasKey .Values "service") (hasKey .Values.service "ports") }}
|
|
||||||
ports:
|
|
||||||
{{- range .Values.service.ports }}
|
|
||||||
- name: {{ .name }}
|
|
||||||
containerPort: {{ .port }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if hasKey .Values "command" }}
|
|
||||||
command: {{ .Values.command }}
|
|
||||||
{{- end }}
|
|
||||||
volumeMounts:
|
|
||||||
{{- toYaml .Values.volumeMounts | nindent 12 }}
|
|
||||||
resources:
|
|
||||||
{{- toYaml .Values.resources | nindent 12 }}
|
|
||||||
restartPolicy: Never
|
|
||||||
{{- with .Values.volumes }}
|
|
||||||
volumes:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.affinity }}
|
|
||||||
affinity:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.nodeSelector }}
|
|
||||||
nodeSelector:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.tolerations }}
|
|
||||||
tolerations:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
backoffLimit: 0
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
{{- if .Values.priorityclassname }}
|
|
||||||
apiVersion: scheduling.k8s.io/v1
|
|
||||||
kind: PriorityClass
|
|
||||||
metadata:
|
|
||||||
name: "{{ .Values.priorityclassname }}"
|
|
||||||
value: {{ .Values.priorityclassvalue }}
|
|
||||||
globalDefault: false
|
|
||||||
preemptionPolicy: "Never"
|
|
||||||
description: "This is a priority class."
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
{{- if and (hasKey .Values "service") (hasKey .Values.service "type") }}
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: {{ include "judgeflow.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "judgeflow.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.podLabels }}
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
type: {{ .Values.service.type }}
|
|
||||||
ports:
|
|
||||||
{{- range .Values.service.ports }}
|
|
||||||
- port: {{ .port }}
|
|
||||||
targetPort: {{ .port }}
|
|
||||||
protocol: TCP
|
|
||||||
name: {{ .name }}
|
|
||||||
{{- end }}
|
|
||||||
selector:
|
|
||||||
{{- include "judgeflow.selectorLabels" . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
{{- if .Values.serviceAccount.create -}}
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: {{ include "judgeflow.serviceAccountName" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "judgeflow.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.serviceAccount.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Pod
|
|
||||||
metadata:
|
|
||||||
name: {{ include "judgeflow.fullname" . }}-test-connection
|
|
||||||
labels:
|
|
||||||
{{- include "judgeflow.labels" . | nindent 4 }}
|
|
||||||
annotations:
|
|
||||||
"helm.sh/hook": test
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: wget
|
|
||||||
image: busybox
|
|
||||||
command: ['wget']
|
|
||||||
args: ['{{ include "judgeflow.fullname" . }}:{{ .Values.service.port }}']
|
|
||||||
restartPolicy: Never
|
|
||||||
@@ -1,124 +0,0 @@
|
|||||||
# Default values for job_demo.
|
|
||||||
# This is a YAML-formatted file.
|
|
||||||
# Declare variables to be passed into your templates.
|
|
||||||
|
|
||||||
replicaCount: 1
|
|
||||||
|
|
||||||
image:
|
|
||||||
repository: "${imageRepo}"
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
# Overrides the image tag whose default is the chart appVersion.
|
|
||||||
tag: "${imageTag}"
|
|
||||||
|
|
||||||
imagePullSecrets: []
|
|
||||||
nameOverride: ""
|
|
||||||
fullnameOverride: ""
|
|
||||||
|
|
||||||
serviceAccount:
|
|
||||||
# Specifies whether a service account should be created
|
|
||||||
create: true
|
|
||||||
# Annotations to add to the service account
|
|
||||||
annotations: {}
|
|
||||||
# The name of the service account to use.
|
|
||||||
# If not set and create is true, a name is generated using the fullname template
|
|
||||||
name: ""
|
|
||||||
|
|
||||||
podAnnotations: {}
|
|
||||||
|
|
||||||
podLabels:
|
|
||||||
contest.4pd.io/leaderboard-resource-type: judge_flow
|
|
||||||
contest.4pd.io/leaderboard-job-id: "0"
|
|
||||||
contest.4pd.io/leaderboard-submit-id: "0"
|
|
||||||
|
|
||||||
podSecurityContext: {}
|
|
||||||
# fsGroup: 2000
|
|
||||||
|
|
||||||
securityContext: {}
|
|
||||||
# capabilities:
|
|
||||||
# drop:
|
|
||||||
# - ALL
|
|
||||||
# readOnlyRootFilesystem: true
|
|
||||||
# runAsNonRoot: true
|
|
||||||
# runAsUser: 1000
|
|
||||||
|
|
||||||
service:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 80
|
|
||||||
|
|
||||||
ingress:
|
|
||||||
enabled: false
|
|
||||||
className: ""
|
|
||||||
annotations: {}
|
|
||||||
# kubernetes.io/ingress.class: nginx
|
|
||||||
# kubernetes.io/tls-acme: "true"
|
|
||||||
hosts:
|
|
||||||
- host: chart-example.local
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: ImplementationSpecific
|
|
||||||
tls: []
|
|
||||||
# - secretName: chart-example-tls
|
|
||||||
# hosts:
|
|
||||||
# - chart-example.local
|
|
||||||
|
|
||||||
resources:
|
|
||||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
||||||
# choice for the user. This also increases chances charts run on environments with little
|
|
||||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
||||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
||||||
limits:
|
|
||||||
cpu: 3000m
|
|
||||||
memory: 16Gi
|
|
||||||
requests:
|
|
||||||
cpu: 3000m
|
|
||||||
memory: 16Gi
|
|
||||||
|
|
||||||
autoscaling:
|
|
||||||
enabled: false
|
|
||||||
minReplicas: 1
|
|
||||||
maxReplicas: 100
|
|
||||||
targetCPUUtilizationPercentage: 80
|
|
||||||
# targetMemoryUtilizationPercentage: 80
|
|
||||||
|
|
||||||
nodeSelector:
|
|
||||||
juicefs: "on"
|
|
||||||
contest.4pd.io/cpu: INTEL-8358
|
|
||||||
|
|
||||||
tolerations: []
|
|
||||||
|
|
||||||
affinity: {}
|
|
||||||
|
|
||||||
env:
|
|
||||||
- name: TZ
|
|
||||||
value: Asia/Shanghai
|
|
||||||
- name: MY_POD_IP
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: status.podIP
|
|
||||||
|
|
||||||
#command: '["python","run.py"]'
|
|
||||||
|
|
||||||
volumeMounts:
|
|
||||||
- name: workspace
|
|
||||||
mountPath: /tmp/workspace
|
|
||||||
- name: datafile
|
|
||||||
mountPath: /tmp/datafile
|
|
||||||
- name: submit
|
|
||||||
mountPath: /tmp/submit_config
|
|
||||||
- name: juicefs-pv
|
|
||||||
mountPath: /tmp/juicefs
|
|
||||||
- name: customer
|
|
||||||
mountPath: /tmp/customer
|
|
||||||
- name: submit-private
|
|
||||||
mountPath: /tmp/submit_private
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
- name: juicefs-pv
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: juicefs-pvc
|
|
||||||
|
|
||||||
|
|
||||||
priorityclassname: ''
|
|
||||||
priorityclassvalue: '0'
|
|
||||||
BIN
helm-chart/sut/.DS_Store
vendored
BIN
helm-chart/sut/.DS_Store
vendored
Binary file not shown.
@@ -1,23 +0,0 @@
|
|||||||
# Patterns to ignore when building packages.
|
|
||||||
# This supports shell glob matching, relative path matching, and
|
|
||||||
# negation (prefixed with !). Only one pattern per line.
|
|
||||||
.DS_Store
|
|
||||||
# Common VCS dirs
|
|
||||||
.git/
|
|
||||||
.gitignore
|
|
||||||
.bzr/
|
|
||||||
.bzrignore
|
|
||||||
.hg/
|
|
||||||
.hgignore
|
|
||||||
.svn/
|
|
||||||
# Common backup files
|
|
||||||
*.swp
|
|
||||||
*.bak
|
|
||||||
*.tmp
|
|
||||||
*.orig
|
|
||||||
*~
|
|
||||||
# Various IDEs
|
|
||||||
.project
|
|
||||||
.idea/
|
|
||||||
*.tmproj
|
|
||||||
.vscode/
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
apiVersion: v2
|
|
||||||
name: sut
|
|
||||||
description: A Helm chart for Kubernetes
|
|
||||||
|
|
||||||
# A chart can be either an 'application' or a 'library' chart.
|
|
||||||
#
|
|
||||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
|
||||||
# to be deployed.
|
|
||||||
#
|
|
||||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
|
||||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
|
||||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
|
||||||
type: application
|
|
||||||
|
|
||||||
# This is the chart version. This version number should be incremented each time you make changes
|
|
||||||
# to the chart and its templates, including the app version.
|
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
|
||||||
version: 0.1.0
|
|
||||||
|
|
||||||
# This is the version number of the application being deployed. This version number should be
|
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
|
||||||
# It is recommended to use it with quotes.
|
|
||||||
appVersion: "0.1.0"
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
{{/*
|
|
||||||
Expand the name of the chart.
|
|
||||||
*/}}
|
|
||||||
{{- define "sut.name" -}}
|
|
||||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create a default fully qualified app name.
|
|
||||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
|
||||||
If release name contains chart name it will be used as a full name.
|
|
||||||
*/}}
|
|
||||||
{{- define "sut.fullname" -}}
|
|
||||||
{{- if .Values.fullnameOverride }}
|
|
||||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
|
||||||
{{- if contains $name .Release.Name }}
|
|
||||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create chart name and version as used by the chart label.
|
|
||||||
*/}}
|
|
||||||
{{- define "sut.chart" -}}
|
|
||||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Common labels
|
|
||||||
*/}}
|
|
||||||
{{- define "sut.labels" -}}
|
|
||||||
helm.sh/chart: {{ include "sut.chart" . }}
|
|
||||||
{{ include "sut.selectorLabels" . }}
|
|
||||||
{{- if .Chart.AppVersion }}
|
|
||||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|
||||||
{{- end }}
|
|
||||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Selector labels
|
|
||||||
*/}}
|
|
||||||
{{- define "sut.selectorLabels" -}}
|
|
||||||
app.kubernetes.io/name: {{ include "sut.name" . }}
|
|
||||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create the name of the service account to use
|
|
||||||
*/}}
|
|
||||||
{{- define "sut.serviceAccountName" -}}
|
|
||||||
{{- if .Values.serviceAccount.create }}
|
|
||||||
{{- default (include "sut.fullname" .) .Values.serviceAccount.name }}
|
|
||||||
{{- else }}
|
|
||||||
{{- default "default" .Values.serviceAccount.name }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,94 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: {{ include "sut.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "sut.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.podLabels }}
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
{{- if not .Values.autoscaling.enabled }}
|
|
||||||
replicas: {{ .Values.replicaCount }}
|
|
||||||
{{- end }}
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
{{- include "sut.selectorLabels" . | nindent 6 }}
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
{{- with .Values.podAnnotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
labels:
|
|
||||||
{{- include "sut.labels" . | nindent 8 }}
|
|
||||||
{{- with .Values.podLabels }}
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
{{- with .Values.imagePullSecrets }}
|
|
||||||
imagePullSecrets:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
serviceAccountName: {{ include "sut.serviceAccountName" . }}
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
|
||||||
{{- with .Values.priorityclassname }}
|
|
||||||
priorityClassName: "{{ . }}"
|
|
||||||
{{- end }}
|
|
||||||
containers:
|
|
||||||
- name: {{ .Chart.Name }}
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
|
||||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
||||||
{{- with .Values.env }}
|
|
||||||
env:
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: {{ .Values.service.port }}
|
|
||||||
protocol: TCP
|
|
||||||
{{- with .Values.command }}
|
|
||||||
command:
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
resources:
|
|
||||||
{{- toYaml .Values.resources | nindent 12 }}
|
|
||||||
{{- with .Values.volumeMounts }}
|
|
||||||
volumeMounts:
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{- with .Values.livenessProbe }}
|
|
||||||
livenessProbe:
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.readinessProbe }}
|
|
||||||
readinessProbe:
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.startupProbe }}
|
|
||||||
startupProbe:
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
{{- with .Values.volumes }}
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{- with .Values.nodeSelector }}
|
|
||||||
nodeSelector:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.affinity }}
|
|
||||||
affinity:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
tolerations:
|
|
||||||
- key: "hosttype"
|
|
||||||
operator: "Equal"
|
|
||||||
value: "iluvatar"
|
|
||||||
effect: "NoSchedule"
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
{{- if .Values.autoscaling.enabled }}
|
|
||||||
apiVersion: autoscaling/v2
|
|
||||||
kind: HorizontalPodAutoscaler
|
|
||||||
metadata:
|
|
||||||
name: {{ include "sut.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "sut.labels" . | nindent 4 }}
|
|
||||||
spec:
|
|
||||||
scaleTargetRef:
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
name: {{ include "sut.fullname" . }}
|
|
||||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
|
||||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
|
||||||
metrics:
|
|
||||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
|
||||||
- type: Resource
|
|
||||||
resource:
|
|
||||||
name: cpu
|
|
||||||
target:
|
|
||||||
type: Utilization
|
|
||||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
|
||||||
- type: Resource
|
|
||||||
resource:
|
|
||||||
name: memory
|
|
||||||
target:
|
|
||||||
type: Utilization
|
|
||||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
{{- if .Values.ingress.enabled -}}
|
|
||||||
{{- $fullName := include "sut.fullname" . -}}
|
|
||||||
{{- $svcPort := .Values.service.port -}}
|
|
||||||
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
|
||||||
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
|
||||||
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
|
||||||
apiVersion: networking.k8s.io/v1beta1
|
|
||||||
{{- else -}}
|
|
||||||
apiVersion: extensions/v1beta1
|
|
||||||
{{- end }}
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: {{ $fullName }}
|
|
||||||
labels:
|
|
||||||
{{- include "sut.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.ingress.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
|
|
||||||
ingressClassName: {{ .Values.ingress.className }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.ingress.tls }}
|
|
||||||
tls:
|
|
||||||
{{- range .Values.ingress.tls }}
|
|
||||||
- hosts:
|
|
||||||
{{- range .hosts }}
|
|
||||||
- {{ . | quote }}
|
|
||||||
{{- end }}
|
|
||||||
secretName: {{ .secretName }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
rules:
|
|
||||||
{{- range .Values.ingress.hosts }}
|
|
||||||
- host: {{ .host | quote }}
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
{{- range .paths }}
|
|
||||||
- path: {{ .path }}
|
|
||||||
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
|
|
||||||
pathType: {{ .pathType }}
|
|
||||||
{{- end }}
|
|
||||||
backend:
|
|
||||||
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
|
|
||||||
service:
|
|
||||||
name: {{ $fullName }}
|
|
||||||
port:
|
|
||||||
number: {{ $svcPort }}
|
|
||||||
{{- else }}
|
|
||||||
serviceName: {{ $fullName }}
|
|
||||||
servicePort: {{ $svcPort }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: {{ include "sut.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "sut.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.podLabels }}
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
type: {{ .Values.service.type }}
|
|
||||||
ports:
|
|
||||||
- port: {{ .Values.service.port }}
|
|
||||||
targetPort: http
|
|
||||||
protocol: TCP
|
|
||||||
name: socket
|
|
||||||
selector:
|
|
||||||
{{- include "sut.selectorLabels" . | nindent 4 }}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
{{- if .Values.serviceAccount.create -}}
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: {{ include "sut.serviceAccountName" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "sut.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.serviceAccount.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Pod
|
|
||||||
metadata:
|
|
||||||
name: "{{ include "sut.fullname" . }}-test-connection"
|
|
||||||
labels:
|
|
||||||
{{- include "sut.labels" . | nindent 4 }}
|
|
||||||
annotations:
|
|
||||||
"helm.sh/hook": test
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: wget
|
|
||||||
image: busybox
|
|
||||||
command: ['wget']
|
|
||||||
args: ['{{ include "sut.fullname" . }}:{{ .Values.service.port }}']
|
|
||||||
restartPolicy: Never
|
|
||||||
@@ -1,144 +0,0 @@
|
|||||||
# Default values for sut.
|
|
||||||
# This is a YAML-formatted file.
|
|
||||||
# Declare variables to be passed into your templates.
|
|
||||||
|
|
||||||
replicaCount: 1
|
|
||||||
|
|
||||||
image:
|
|
||||||
repository: harbor.4pd.io/lab-platform/inf/python
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
# Overrides the image tag whose default is the chart appVersion.
|
|
||||||
tag: 3.9
|
|
||||||
|
|
||||||
imagePullSecrets: []
|
|
||||||
nameOverride: ""
|
|
||||||
fullnameOverride: ""
|
|
||||||
|
|
||||||
serviceAccount:
|
|
||||||
# Specifies whether a service account should be created
|
|
||||||
create: true
|
|
||||||
# Automatically mount a ServiceAccount's API credentials?
|
|
||||||
automount: true
|
|
||||||
# Annotations to add to the service account
|
|
||||||
annotations: {}
|
|
||||||
# The name of the service account to use.
|
|
||||||
# If not set and create is true, a name is generated using the fullname template
|
|
||||||
name: ""
|
|
||||||
|
|
||||||
podAnnotations: {}
|
|
||||||
podLabels: {}
|
|
||||||
podSecurityContext: {}
|
|
||||||
# fsGroup: 2000
|
|
||||||
|
|
||||||
securityContext: {}
|
|
||||||
# capabilities:
|
|
||||||
# drop:
|
|
||||||
# - ALL
|
|
||||||
# readOnlyRootFilesystem: true
|
|
||||||
# runAsNonRoot: true
|
|
||||||
# runAsUser: 1000
|
|
||||||
|
|
||||||
service:
|
|
||||||
type: ClusterIP
|
|
||||||
port: 80
|
|
||||||
|
|
||||||
ingress:
|
|
||||||
enabled: false
|
|
||||||
className: ""
|
|
||||||
annotations: {}
|
|
||||||
# kubernetes.io/ingress.class: nginx
|
|
||||||
# kubernetes.io/tls-acme: "true"
|
|
||||||
hosts:
|
|
||||||
- host: chart-example.local
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: ImplementationSpecific
|
|
||||||
tls: []
|
|
||||||
# - secretName: chart-example-tls
|
|
||||||
# hosts:
|
|
||||||
# - chart-example.local
|
|
||||||
|
|
||||||
resources:
|
|
||||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
||||||
# choice for the user. This also increases chances charts run on environments with little
|
|
||||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
||||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
||||||
limits:
|
|
||||||
cpu: 1000m
|
|
||||||
memory: 4096Mi
|
|
||||||
requests:
|
|
||||||
cpu: 1000m
|
|
||||||
memory: 4096Mi
|
|
||||||
|
|
||||||
autoscaling:
|
|
||||||
enabled: false
|
|
||||||
minReplicas: 1
|
|
||||||
maxReplicas: 100
|
|
||||||
targetCPUUtilizationPercentage: 80
|
|
||||||
# targetMemoryUtilizationPercentage: 80
|
|
||||||
|
|
||||||
# Additional volumes on the output Deployment definition.
|
|
||||||
volumes: []
|
|
||||||
# - name: foo
|
|
||||||
# secret:
|
|
||||||
# secretName: mysecret
|
|
||||||
# optional: false
|
|
||||||
|
|
||||||
# Additional volumeMounts on the output Deployment definition.
|
|
||||||
volumeMounts: []
|
|
||||||
# - name: foo
|
|
||||||
# mountPath: "/etc/foo"
|
|
||||||
# readOnly: true
|
|
||||||
|
|
||||||
nodeSelector:
|
|
||||||
contest.4pd.io/accelerator: iluvatar-BI-V100
|
|
||||||
|
|
||||||
tolerations:
|
|
||||||
- key: hosttype
|
|
||||||
operator: Equal
|
|
||||||
value: iluvatar
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
|
|
||||||
affinity: {}
|
|
||||||
|
|
||||||
readinessProbe:
|
|
||||||
failureThreshold: 1000
|
|
||||||
httpGet:
|
|
||||||
path: /health
|
|
||||||
port: 80
|
|
||||||
scheme: HTTP
|
|
||||||
|
|
||||||
#readinessProbe:
|
|
||||||
# httpGet:
|
|
||||||
# path: /health
|
|
||||||
# port: 80
|
|
||||||
# scheme: HTTP
|
|
||||||
# initialDelaySeconds: 5 # 应用启动后等待 5 秒再开始探测
|
|
||||||
# failureThreshold: 5 # 连续失败 3 次后标记为未就绪
|
|
||||||
# successThreshold: 1 # 连续成功 1 次后标记为就绪
|
|
||||||
|
|
||||||
env:
|
|
||||||
- name: TZ
|
|
||||||
value: Asia/Shanghai
|
|
||||||
- name: MY_POD_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.name
|
|
||||||
- name: MY_POD_NAMESPACE
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.namespace
|
|
||||||
- name: MY_POD_IP
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: status.podIP
|
|
||||||
- name: MY_NODE_IP
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: status.hostIP
|
|
||||||
|
|
||||||
#command: ''
|
|
||||||
|
|
||||||
|
|
||||||
priorityclassname: ''
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
import os
|
|
||||||
import tempfile
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
if os.path.exists("/tmp/submit_private"):
|
|
||||||
shutil.rmtree("/tmp/submit_private")
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
|
||||||
config_path = os.path.join(tempdir, "config.json")
|
|
||||||
|
|
||||||
assert not os.system(f"ssh-keygen -f {tempdir}/ssh-key-ecdsa -t ecdsa -b 521 -q -N \"\"")
|
|
||||||
|
|
||||||
config = """
|
|
||||||
model: whisper
|
|
||||||
model_key: whisper
|
|
||||||
config.json:
|
|
||||||
name: 'faster-whisper-server:latest'
|
|
||||||
support_devices:
|
|
||||||
- cpu
|
|
||||||
model_path: ''
|
|
||||||
port: 8080
|
|
||||||
other_ports: []
|
|
||||||
other_ports_count: 1
|
|
||||||
entrypoint: start.bat
|
|
||||||
MIN_CHUNK: 2.5
|
|
||||||
MIN_ADD_CHUNK: 2.5
|
|
||||||
COMPUTE_TYPE: int8
|
|
||||||
NUM_WORKERS: 1
|
|
||||||
CPU_THREADS: 2
|
|
||||||
BEAM_SIZE: 5
|
|
||||||
BATCH: 1
|
|
||||||
LANG: auto
|
|
||||||
DEVICE: cpu
|
|
||||||
CHUNK_LENGTH: 5
|
|
||||||
CLASS_MODEL: ./models/faster-whisper-base
|
|
||||||
EN_MODEL: ./models/faster-whisper-base
|
|
||||||
ZH_MODEL: ./models/faster-whisper-base
|
|
||||||
RU_MODEL: ./models/faster-whisper-base
|
|
||||||
PT_MODEL: ./models/faster-whisper-base
|
|
||||||
AR_MODEL: ./models/faster-whisper-base
|
|
||||||
NEW_VERSION: 1
|
|
||||||
NEED_RESET: 0
|
|
||||||
leaderboard_options:
|
|
||||||
nfs:
|
|
||||||
- name: whisper
|
|
||||||
srcRelativePath: leaderboard/pc_asr/en.tar.gz
|
|
||||||
mountPoint: /tmp
|
|
||||||
source: ceph_customer
|
|
||||||
"""
|
|
||||||
|
|
||||||
with open(config_path, "w") as f:
|
|
||||||
f.write(config)
|
|
||||||
|
|
||||||
os.environ["SSH_KEY_DIR"] = tempdir
|
|
||||||
os.environ["SUBMIT_CONFIG_FILEPATH"] = config_path
|
|
||||||
os.environ["MODEL_MAPPING"] = '{"whisper": "edge-ml.tar.gz"}'
|
|
||||||
|
|
||||||
from run_async_a10 import get_sut_url_windows
|
|
||||||
|
|
||||||
|
|
||||||
print(get_sut_url_windows())
|
|
||||||
|
|
||||||
import time
|
|
||||||
time.sleep(3600)
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
export DATASET_FILEPATH=dataset/formatted1/de.zip
|
|
||||||
export RESULT_FILEPATH=out/result.json
|
|
||||||
export DETAILED_CASES_FILEPATH=out/detail_cases.json
|
|
||||||
export SUBMIT_CONFIG_FILEPATH=
|
|
||||||
export BENCHMARK_NAME=
|
|
||||||
export MY_POD_IP=127.0.0.1
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
[tool.black]
|
|
||||||
line-length = 80
|
|
||||||
target-version = ['py39']
|
|
||||||
|
|
||||||
[tool.flake8]
|
|
||||||
max-line-length = 88
|
|
||||||
count=true
|
|
||||||
per-file-ignores="./annotation/manager.py:F401"
|
|
||||||
exclude=["./label", "__pycache__", "./migrations", "./logs", "./pids", "./resources"]
|
|
||||||
ignore=["W503", "E203"]
|
|
||||||
enable-extensions="G"
|
|
||||||
application-import-names=["flake8-isort", "flake8-logging-format", "flake8-builtins"]
|
|
||||||
import-order-style="edited"
|
|
||||||
extend-ignore = ["E203", "E701"]
|
|
||||||
|
|
||||||
[tool.isort]
|
|
||||||
py_version=39
|
|
||||||
profile="black"
|
|
||||||
multi_line_output=9
|
|
||||||
line_length=80
|
|
||||||
group_by_package=true
|
|
||||||
case_sensitive=true
|
|
||||||
skip_gitignore=true
|
|
||||||
|
|
||||||
114
run.py
114
run.py
@@ -1,114 +0,0 @@
|
|||||||
import gc
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import zipfile
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
from schemas.context import ASRContext
|
|
||||||
from utils.client import Client
|
|
||||||
from utils.evaluator import BaseEvaluator
|
|
||||||
from utils.logger import logger
|
|
||||||
from utils.service import register_sut
|
|
||||||
|
|
||||||
IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None
|
|
||||||
UNIT_TEST = os.getenv("UNIT_TEST", 0)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
logger.info("执行……")
|
|
||||||
|
|
||||||
dataset_filepath = os.getenv(
|
|
||||||
"DATASET_FILEPATH",
|
|
||||||
"./tests/resources/en.zip",
|
|
||||||
)
|
|
||||||
submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config")
|
|
||||||
result_filepath = os.getenv("RESULT_FILEPATH", "./out/result")
|
|
||||||
bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase")
|
|
||||||
detail_cases_filepath = os.getenv("DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl")
|
|
||||||
|
|
||||||
resource_name = os.getenv("BENCHMARK_NAME")
|
|
||||||
|
|
||||||
# 提交配置 & 启动被测服务
|
|
||||||
if os.getenv("DATASET_FILEPATH", ""):
|
|
||||||
from utils.helm import resource_check
|
|
||||||
|
|
||||||
with open(submit_config_filepath, "r") as fp:
|
|
||||||
st_config = yaml.safe_load(fp)
|
|
||||||
st_config["values"] = resource_check(st_config.get("values", {}))
|
|
||||||
if 'docker_images' in st_config:
|
|
||||||
sut_url = "ws://172.26.1.75:9827"
|
|
||||||
os.environ['test'] = '1'
|
|
||||||
elif 'docker_image' in st_config:
|
|
||||||
sut_url = register_sut(st_config, resource_name)
|
|
||||||
elif UNIT_TEST:
|
|
||||||
sut_url = "ws://172.27.231.36:80"
|
|
||||||
else:
|
|
||||||
logger.error("config 配置错误,没有 docker_image")
|
|
||||||
os._exit(1)
|
|
||||||
else:
|
|
||||||
os.environ['test'] = '1'
|
|
||||||
sut_url = "ws://172.27.231.36:80"
|
|
||||||
if UNIT_TEST:
|
|
||||||
exit(0)
|
|
||||||
|
|
||||||
"""
|
|
||||||
# 数据集处理
|
|
||||||
local_dataset_path = "./dataset"
|
|
||||||
os.makedirs(local_dataset_path, exist_ok=True)
|
|
||||||
with zipfile.ZipFile(dataset_filepath) as zf:
|
|
||||||
zf.extractall(local_dataset_path)
|
|
||||||
config_path = os.path.join(local_dataset_path, "data.yaml")
|
|
||||||
with open(config_path, "r") as fp:
|
|
||||||
dataset_config = yaml.safe_load(fp)
|
|
||||||
|
|
||||||
# 数据集信息
|
|
||||||
dataset_global_config = dataset_config.get("global", {})
|
|
||||||
dataset_query = dataset_config.get("query_data", {})
|
|
||||||
|
|
||||||
evaluator = BaseEvaluator()
|
|
||||||
|
|
||||||
# 开始预测
|
|
||||||
for idx, query_item in enumerate(dataset_query):
|
|
||||||
gc.collect()
|
|
||||||
logger.info(f"开始执行 {idx} 条数据")
|
|
||||||
|
|
||||||
context = ASRContext(**dataset_global_config)
|
|
||||||
context.lang = query_item.get("lang", context.lang)
|
|
||||||
context.file_path = os.path.join(local_dataset_path, query_item["file"])
|
|
||||||
# context.audio_length = query_item["audio_length"]
|
|
||||||
|
|
||||||
interactions = Client(sut_url, context).action()
|
|
||||||
context.append_labels(query_item["voice"])
|
|
||||||
context.append_preds(
|
|
||||||
interactions["predict_data"],
|
|
||||||
interactions["send_time"],
|
|
||||||
interactions["recv_time"],
|
|
||||||
)
|
|
||||||
context.fail = interactions["fail"]
|
|
||||||
if IN_TEST:
|
|
||||||
with open('output.txt', 'w') as fp:
|
|
||||||
original_stdout = sys.stdout
|
|
||||||
sys.stdout = fp
|
|
||||||
print(context)
|
|
||||||
sys.stdout = original_stdout
|
|
||||||
evaluator.evaluate(context)
|
|
||||||
detail_case = evaluator.gen_detail_case()
|
|
||||||
with open(detail_cases_filepath, "a") as fp:
|
|
||||||
fp.write(json.dumps(detail_case.to_dict(), ensure_ascii=False) + "\n")
|
|
||||||
time.sleep(4)
|
|
||||||
|
|
||||||
evaluator.post_evaluate()
|
|
||||||
output_result = evaluator.gen_result()
|
|
||||||
# print(evaluator.__dict__)
|
|
||||||
logger.info("执行完成. Result = {output_result}")
|
|
||||||
|
|
||||||
with open(result_filepath, "w") as fp:
|
|
||||||
json.dump(output_result, fp, indent=2, ensure_ascii=False)
|
|
||||||
with open(bad_cases_filepath, "w") as fp:
|
|
||||||
fp.write("当前榜单不存在 Bad Case\n")
|
|
||||||
"""
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
757
run_async_a10.py
757
run_async_a10.py
@@ -1,757 +0,0 @@
|
|||||||
import atexit
|
|
||||||
import concurrent.futures
|
|
||||||
import fcntl
|
|
||||||
import gc
|
|
||||||
import glob
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import tempfile
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
import zipfile
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
from fabric import Connection
|
|
||||||
from vmplatform import VMOS, Client, VMDataDisk
|
|
||||||
|
|
||||||
from schemas.context import ASRContext
|
|
||||||
from utils.client_async import ClientAsync
|
|
||||||
from utils.evaluator import BaseEvaluator
|
|
||||||
from utils.logger import logger
|
|
||||||
from utils.service import register_sut
|
|
||||||
|
|
||||||
IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None
|
|
||||||
UNIT_TEST = os.getenv("UNIT_TEST", 0)
|
|
||||||
|
|
||||||
DATASET_NUM = os.getenv("DATASET_NUM")
|
|
||||||
|
|
||||||
# vm榜单参数
|
|
||||||
SUT_TYPE = os.getenv("SUT_TYPE", "kubernetes")
|
|
||||||
SHARE_SUT = os.getenv("SHARE_SUT", "true") == "true"
|
|
||||||
VM_ID = 0
|
|
||||||
VM_IP = ""
|
|
||||||
do_deploy_chart = True
|
|
||||||
VM_CPU = int(os.getenv("VM_CPU", "2"))
|
|
||||||
VM_MEM = int(os.getenv("VM_MEM", "4096"))
|
|
||||||
MODEL_BASEPATH = os.getenv("MODEL_BASEPATH", "/tmp/customer/leaderboard/pc_asr")
|
|
||||||
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
|
||||||
SSH_KEY_DIR = os.getenv("SSH_KEY_DIR", "/workspace")
|
|
||||||
SSH_PUBLIC_KEY_FILE = os.path.join(SSH_KEY_DIR, "ssh-key-ecdsa.pub")
|
|
||||||
SSH_KEY_FILE = os.path.join(SSH_KEY_DIR, "ssh-key-ecdsa")
|
|
||||||
|
|
||||||
CONNECT_KWARGS = {"key_filename": SSH_KEY_FILE}
|
|
||||||
|
|
||||||
# 共享sut参数
|
|
||||||
JOB_ID = os.getenv("JOB_ID")
|
|
||||||
dirname = "/tmp/submit_private/sut_share"
|
|
||||||
os.makedirs(dirname, exist_ok=True)
|
|
||||||
SUT_SHARE_LOCK = os.path.join(dirname, "lock.lock")
|
|
||||||
SUT_SHARE_USE_LOCK = os.path.join(dirname, "use.lock")
|
|
||||||
SUT_SHARE_STATUS = os.path.join(dirname, "status.json")
|
|
||||||
SUT_SHARE_JOB_STATUS = os.path.join(dirname, f"job_status.{JOB_ID}")
|
|
||||||
SUT_SHARE_PUBLIC_FAIL = os.path.join(dirname, "one_job_failed")
|
|
||||||
fd_lock = open(SUT_SHARE_USE_LOCK, "a")
|
|
||||||
|
|
||||||
|
|
||||||
def clean_vm_atexit():
|
|
||||||
global VM_ID, do_deploy_chart
|
|
||||||
if not VM_ID:
|
|
||||||
return
|
|
||||||
if not do_deploy_chart:
|
|
||||||
return
|
|
||||||
logger.info("删除vm")
|
|
||||||
vmclient = Client()
|
|
||||||
err_msg = vmclient.delete_vm(VM_ID)
|
|
||||||
if err_msg:
|
|
||||||
logger.warning(f"删除vm失败: {err_msg}")
|
|
||||||
|
|
||||||
|
|
||||||
def put_file_to_vm(c: Connection, local_path: str, remote_path: str):
|
|
||||||
logger.info(f"uploading file {local_path} to {remote_path}")
|
|
||||||
result = c.put(local_path, remote_path)
|
|
||||||
logger.info("uploaded {0.local} to {0.remote}".format(result))
|
|
||||||
|
|
||||||
|
|
||||||
def deploy_windows_sut():
|
|
||||||
global VM_ID
|
|
||||||
global VM_IP
|
|
||||||
|
|
||||||
submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "")
|
|
||||||
with open(submit_config_filepath, "r") as fp:
|
|
||||||
st_config = yaml.safe_load(fp)
|
|
||||||
assert "model" in st_config, "未配置model"
|
|
||||||
assert "model_key" in st_config, "未配置model_key"
|
|
||||||
assert "config.json" in st_config, "未配置config.json"
|
|
||||||
nfs = st_config.get("leaderboard_options", {}).get("nfs", [])
|
|
||||||
assert len(nfs) > 0, "未配置nfs"
|
|
||||||
assert st_config["model"] in MODEL_MAPPING, "提交模型不在可用模型范围内"
|
|
||||||
|
|
||||||
model = st_config["model"]
|
|
||||||
model_key = st_config["model_key"]
|
|
||||||
model_path = ""
|
|
||||||
config = st_config["config.json"]
|
|
||||||
exist = False
|
|
||||||
for nfs_item in nfs:
|
|
||||||
if nfs_item["name"] == model_key:
|
|
||||||
exist = True
|
|
||||||
if nfs_item["source"] == "ceph_customer":
|
|
||||||
model_path = os.path.join(
|
|
||||||
"/tmp/customer",
|
|
||||||
nfs_item["srcRelativePath"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
model_path = os.path.join(
|
|
||||||
"/tmp/juicefs",
|
|
||||||
nfs_item["srcRelativePath"],
|
|
||||||
)
|
|
||||||
break
|
|
||||||
if not exist:
|
|
||||||
raise RuntimeError(f"未找到nfs配置项 name={model_key}")
|
|
||||||
config_path = os.path.join(tempfile.mkdtemp(), "config.json")
|
|
||||||
model_dir = os.path.basename(model_path).split(".")[0]
|
|
||||||
config["model_path"] = f"E:\\model\\{model_dir}"
|
|
||||||
with open(config_path, "w") as fp:
|
|
||||||
json.dump(config, fp, ensure_ascii=False, indent=4)
|
|
||||||
|
|
||||||
vmclient = Client()
|
|
||||||
with open(SSH_PUBLIC_KEY_FILE, "r") as fp:
|
|
||||||
sshpublickey = fp.read().rstrip()
|
|
||||||
VM_ID = vmclient.create_vm(
|
|
||||||
"amd64",
|
|
||||||
VMOS.windows10,
|
|
||||||
VM_CPU,
|
|
||||||
VM_MEM,
|
|
||||||
"leaderboard-%s-submit-%s-job-%s"
|
|
||||||
% (
|
|
||||||
os.getenv("BENCHMARK_NAME"),
|
|
||||||
os.getenv("SUBMIT_ID"),
|
|
||||||
os.getenv("JOB_ID"),
|
|
||||||
),
|
|
||||||
sshpublickey,
|
|
||||||
datadisks=[
|
|
||||||
VMDataDisk(
|
|
||||||
size=50,
|
|
||||||
disk_type="ssd",
|
|
||||||
mount_path="/",
|
|
||||||
filesystem="NTFS",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
)
|
|
||||||
atexit.register(clean_vm_atexit)
|
|
||||||
signal.signal(signal.SIGTERM, lambda signum, _: sys.exit(signum))
|
|
||||||
VM_IP = vmclient.wait_until_vm_running(VM_ID)
|
|
||||||
logger.info("vm created successfully, vm_ip: %s", VM_IP)
|
|
||||||
|
|
||||||
def sut_startup():
|
|
||||||
with Connection(
|
|
||||||
VM_IP,
|
|
||||||
"administrator",
|
|
||||||
connect_kwargs=CONNECT_KWARGS,
|
|
||||||
) as c:
|
|
||||||
script_path = "E:\\base\\asr\\faster-whisper\\server"
|
|
||||||
script_path = "E:\\install\\asr\\sensevoice\\server"
|
|
||||||
bat_filepath = f"{script_path}\\start.bat"
|
|
||||||
config_filepath = "E:\\submit\\config.json"
|
|
||||||
result = c.run("")
|
|
||||||
assert result.ok
|
|
||||||
c.run(
|
|
||||||
f'cd /d {script_path} & set "EDGE_ML_ENV_HOME=E:\\install" & {bat_filepath} {config_filepath}',
|
|
||||||
warn=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
with Connection(
|
|
||||||
VM_IP,
|
|
||||||
"administrator",
|
|
||||||
connect_kwargs=CONNECT_KWARGS,
|
|
||||||
) as c:
|
|
||||||
model_filepath = os.path.join(MODEL_BASEPATH, MODEL_MAPPING[model])
|
|
||||||
filename = os.path.basename(model_filepath)
|
|
||||||
put_file_to_vm(c, model_filepath, "/E:/")
|
|
||||||
|
|
||||||
result = c.run("mkdir E:\\base")
|
|
||||||
assert result.ok
|
|
||||||
result = c.run("mkdir E:\\model")
|
|
||||||
assert result.ok
|
|
||||||
result = c.run("mkdir E:\\submit")
|
|
||||||
assert result.ok
|
|
||||||
|
|
||||||
result = c.run(
|
|
||||||
f"tar zxvf E:\\{filename} -C E:\\base --strip-components 1"
|
|
||||||
)
|
|
||||||
assert result.ok
|
|
||||||
|
|
||||||
result = c.run("E:\\base\\setup-win.bat E:\\install")
|
|
||||||
assert result.ok
|
|
||||||
|
|
||||||
put_file_to_vm(c, config_path, "/E:/submit")
|
|
||||||
put_file_to_vm(c, model_path, "/E:/model")
|
|
||||||
result = c.run(
|
|
||||||
f"tar zxvf E:\\model\\{os.path.basename(model_path)} -C E:\\model"
|
|
||||||
)
|
|
||||||
assert result.ok
|
|
||||||
threading.Thread(target=sut_startup, daemon=True).start()
|
|
||||||
time.sleep(60)
|
|
||||||
|
|
||||||
return f"ws://{VM_IP}:{config['port']}"
|
|
||||||
|
|
||||||
|
|
||||||
def deploy_macos_sut():
|
|
||||||
global VM_ID
|
|
||||||
global VM_IP
|
|
||||||
|
|
||||||
submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "")
|
|
||||||
with open(submit_config_filepath, "r") as fp:
|
|
||||||
st_config = yaml.safe_load(fp)
|
|
||||||
assert "model" in st_config, "未配置model"
|
|
||||||
assert "model_key" in st_config, "未配置model_key"
|
|
||||||
assert "config.json" in st_config, "未配置config.json"
|
|
||||||
nfs = st_config.get("leaderboard_options", {}).get("nfs", [])
|
|
||||||
assert len(nfs) > 0, "未配置nfs"
|
|
||||||
assert st_config["model"] in MODEL_MAPPING, "提交模型不在可用模型范围内"
|
|
||||||
|
|
||||||
model = st_config["model"]
|
|
||||||
model_key = st_config["model_key"]
|
|
||||||
model_path = ""
|
|
||||||
config = st_config["config.json"]
|
|
||||||
exist = False
|
|
||||||
for nfs_item in nfs:
|
|
||||||
if nfs_item["name"] == model_key:
|
|
||||||
exist = True
|
|
||||||
if nfs_item["source"] == "ceph_customer":
|
|
||||||
model_path = os.path.join(
|
|
||||||
"/tmp/customer",
|
|
||||||
nfs_item["srcRelativePath"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
model_path = os.path.join(
|
|
||||||
"/tmp/juicefs",
|
|
||||||
nfs_item["srcRelativePath"],
|
|
||||||
)
|
|
||||||
break
|
|
||||||
if not exist:
|
|
||||||
raise RuntimeError(f"未找到nfs配置项 name={model_key}")
|
|
||||||
config_path = os.path.join(tempfile.mkdtemp(), "config.json")
|
|
||||||
model_dir = os.path.basename(model_path).split(".")[0]
|
|
||||||
|
|
||||||
vmclient = Client()
|
|
||||||
with open(SSH_PUBLIC_KEY_FILE, "r") as fp:
|
|
||||||
sshpublickey = fp.read().rstrip()
|
|
||||||
VM_ID = vmclient.create_vm(
|
|
||||||
"amd64",
|
|
||||||
VMOS.macos12,
|
|
||||||
VM_CPU,
|
|
||||||
VM_MEM,
|
|
||||||
"leaderboard-%s-submit-%s-job-%s"
|
|
||||||
% (
|
|
||||||
os.getenv("BENCHMARK_NAME"),
|
|
||||||
os.getenv("SUBMIT_ID"),
|
|
||||||
os.getenv("JOB_ID"),
|
|
||||||
),
|
|
||||||
sshpublickey,
|
|
||||||
datadisks=[
|
|
||||||
VMDataDisk(
|
|
||||||
size=50,
|
|
||||||
disk_type="ssd",
|
|
||||||
mount_path="/",
|
|
||||||
filesystem="apfs",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
)
|
|
||||||
atexit.register(clean_vm_atexit)
|
|
||||||
signal.signal(signal.SIGTERM, lambda signum, _: sys.exit(signum))
|
|
||||||
VM_IP = vmclient.wait_until_vm_running(VM_ID)
|
|
||||||
logger.info("vm created successfully, vm_ip: %s", VM_IP)
|
|
||||||
|
|
||||||
with Connection(
|
|
||||||
VM_IP,
|
|
||||||
"admin",
|
|
||||||
connect_kwargs=CONNECT_KWARGS,
|
|
||||||
) as c:
|
|
||||||
result = c.run("ls -d /Volumes/data*")
|
|
||||||
assert result.ok
|
|
||||||
volume_path = result.stdout.strip()
|
|
||||||
|
|
||||||
config["model_path"] = f"{volume_path}/model/{model_dir}"
|
|
||||||
with open(config_path, "w") as fp:
|
|
||||||
json.dump(config, fp, ensure_ascii=False, indent=4)
|
|
||||||
|
|
||||||
def sut_startup():
|
|
||||||
with Connection(
|
|
||||||
VM_IP,
|
|
||||||
"admin",
|
|
||||||
connect_kwargs=CONNECT_KWARGS,
|
|
||||||
) as c:
|
|
||||||
script_path = f"{volume_path}/install/asr/sensevoice/server"
|
|
||||||
startsh = f"{script_path}/start.sh"
|
|
||||||
config_filepath = f"{volume_path}/submit/config.json"
|
|
||||||
c.run(
|
|
||||||
f"cd {script_path} && sh {startsh} {config_filepath}",
|
|
||||||
warn=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
with Connection(
|
|
||||||
VM_IP,
|
|
||||||
"admin",
|
|
||||||
connect_kwargs=CONNECT_KWARGS,
|
|
||||||
) as c:
|
|
||||||
model_filepath = os.path.join(MODEL_BASEPATH, MODEL_MAPPING[model])
|
|
||||||
filename = os.path.basename(model_filepath)
|
|
||||||
put_file_to_vm(c, model_filepath, f"{volume_path}")
|
|
||||||
|
|
||||||
result = c.run(f"mkdir {volume_path}/base")
|
|
||||||
assert result.ok
|
|
||||||
result = c.run(f"mkdir {volume_path}/model")
|
|
||||||
assert result.ok
|
|
||||||
result = c.run(f"mkdir {volume_path}/submit")
|
|
||||||
assert result.ok
|
|
||||||
|
|
||||||
result = c.run(
|
|
||||||
f"tar zxvf {volume_path}/{filename} -C {volume_path}/base --strip-components 1" # noqa: E501
|
|
||||||
)
|
|
||||||
assert result.ok
|
|
||||||
|
|
||||||
result = c.run(
|
|
||||||
f"sh {volume_path}/base/setup-mac.sh {volume_path}/install x64"
|
|
||||||
)
|
|
||||||
assert result.ok
|
|
||||||
|
|
||||||
put_file_to_vm(c, config_path, f"{volume_path}/submit")
|
|
||||||
put_file_to_vm(c, model_path, f"{volume_path}/model")
|
|
||||||
result = c.run(
|
|
||||||
f"tar zxvf {volume_path}/model/{os.path.basename(model_path)} -C {volume_path}/model" # noqa: E501
|
|
||||||
)
|
|
||||||
assert result.ok
|
|
||||||
threading.Thread(target=sut_startup, daemon=True).start()
|
|
||||||
time.sleep(60)
|
|
||||||
|
|
||||||
return f"ws://{VM_IP}:{config['port']}"
|
|
||||||
|
|
||||||
|
|
||||||
def get_sut_url_vm(vm_type: str):
|
|
||||||
global VM_ID
|
|
||||||
global VM_IP
|
|
||||||
global do_deploy_chart
|
|
||||||
|
|
||||||
do_deploy_chart = True
|
|
||||||
# 拉起SUT
|
|
||||||
|
|
||||||
def check_job_failed():
|
|
||||||
while True:
|
|
||||||
time.sleep(30)
|
|
||||||
if os.path.exists(SUT_SHARE_PUBLIC_FAIL):
|
|
||||||
logger.error("there is a job failed in current submit")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
sut_url = ""
|
|
||||||
threading.Thread(target=check_job_failed, daemon=True).start()
|
|
||||||
if SHARE_SUT:
|
|
||||||
|
|
||||||
time.sleep(10 * random.random())
|
|
||||||
try:
|
|
||||||
open(SUT_SHARE_LOCK, "x").close()
|
|
||||||
except Exception:
|
|
||||||
do_deploy_chart = False
|
|
||||||
|
|
||||||
start_at = time.time()
|
|
||||||
|
|
||||||
def file_last_updated_at(file: str):
|
|
||||||
return os.stat(file).st_mtime if os.path.exists(file) else start_at
|
|
||||||
|
|
||||||
if not do_deploy_chart:
|
|
||||||
with open(SUT_SHARE_JOB_STATUS, "w") as f:
|
|
||||||
f.write("waiting")
|
|
||||||
while (
|
|
||||||
time.time() - file_last_updated_at(SUT_SHARE_STATUS)
|
|
||||||
<= 60 * 60 * 24
|
|
||||||
):
|
|
||||||
logger.info(
|
|
||||||
"Waiting sut application to be deployed by another job"
|
|
||||||
)
|
|
||||||
time.sleep(10 + random.random())
|
|
||||||
if os.path.exists(SUT_SHARE_STATUS):
|
|
||||||
get_status = False
|
|
||||||
for _ in range(10):
|
|
||||||
try:
|
|
||||||
with open(SUT_SHARE_STATUS, "r") as f:
|
|
||||||
status = json.load(f)
|
|
||||||
get_status = True
|
|
||||||
break
|
|
||||||
except Exception:
|
|
||||||
time.sleep(1 + random.random())
|
|
||||||
continue
|
|
||||||
if not get_status:
|
|
||||||
raise RuntimeError(
|
|
||||||
"Failed to get status of sut application"
|
|
||||||
)
|
|
||||||
assert (
|
|
||||||
status.get("status") != "failed"
|
|
||||||
), "Failed to deploy sut application, \
|
|
||||||
please check other job logs"
|
|
||||||
if status.get("status") == "running":
|
|
||||||
VM_ID = status.get("vmid")
|
|
||||||
VM_IP = status.get("vmip")
|
|
||||||
sut_url = status.get("sut_url")
|
|
||||||
with open(SSH_PUBLIC_KEY_FILE, "w") as fp:
|
|
||||||
fp.write(status.get("pubkey"))
|
|
||||||
with open(SSH_KEY_FILE, "w") as fp:
|
|
||||||
fp.write(status.get("prikey"))
|
|
||||||
logger.info("Successfully get deployed sut application")
|
|
||||||
break
|
|
||||||
|
|
||||||
if do_deploy_chart:
|
|
||||||
try:
|
|
||||||
fcntl.flock(fd_lock, fcntl.LOCK_EX)
|
|
||||||
with open(SUT_SHARE_JOB_STATUS, "w") as f:
|
|
||||||
f.write("waiting")
|
|
||||||
pending = True
|
|
||||||
|
|
||||||
def update_status():
|
|
||||||
while pending:
|
|
||||||
time.sleep(30)
|
|
||||||
if not pending:
|
|
||||||
break
|
|
||||||
with open(SUT_SHARE_STATUS, "w") as f:
|
|
||||||
json.dump({"status": "pending"}, f)
|
|
||||||
|
|
||||||
threading.Thread(target=update_status, daemon=True).start()
|
|
||||||
if vm_type == "windows":
|
|
||||||
sut_url = deploy_windows_sut()
|
|
||||||
else:
|
|
||||||
sut_url = deploy_macos_sut()
|
|
||||||
except Exception:
|
|
||||||
open(SUT_SHARE_PUBLIC_FAIL, "w").close()
|
|
||||||
with open(SUT_SHARE_STATUS, "w") as f:
|
|
||||||
json.dump({"status": "failed"}, f)
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
pending = False
|
|
||||||
with open(SUT_SHARE_STATUS, "w") as f:
|
|
||||||
pubkey = ""
|
|
||||||
with open(SSH_PUBLIC_KEY_FILE, "r") as fp:
|
|
||||||
pubkey = fp.read().rstrip()
|
|
||||||
prikey = ""
|
|
||||||
with open(SSH_KEY_FILE, "r") as fp:
|
|
||||||
prikey = fp.read()
|
|
||||||
json.dump(
|
|
||||||
{
|
|
||||||
"status": "running",
|
|
||||||
"vmid": VM_ID,
|
|
||||||
"vmip": VM_IP,
|
|
||||||
"pubkey": pubkey,
|
|
||||||
"sut_url": sut_url,
|
|
||||||
"prikey": prikey,
|
|
||||||
},
|
|
||||||
f,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
while True:
|
|
||||||
time.sleep(5 + random.random())
|
|
||||||
try:
|
|
||||||
fcntl.flock(fd_lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
||||||
break
|
|
||||||
except Exception:
|
|
||||||
logger.info("尝试抢占调用sut失败,继续等待 5s ...")
|
|
||||||
|
|
||||||
with open(SUT_SHARE_JOB_STATUS, "w") as f:
|
|
||||||
f.write("running")
|
|
||||||
|
|
||||||
return sut_url
|
|
||||||
|
|
||||||
|
|
||||||
def get_sut_url():
|
|
||||||
if SUT_TYPE in ("windows", "macos"):
|
|
||||||
return get_sut_url_vm(SUT_TYPE)
|
|
||||||
|
|
||||||
submit_config_filepath = os.getenv(
|
|
||||||
"SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config"
|
|
||||||
)
|
|
||||||
CPU = os.getenv("SUT_CPU", "2")
|
|
||||||
MEMORY = os.getenv("SUT_MEMORY", "4Gi")
|
|
||||||
resource_name = os.getenv("BENCHMARK_NAME")
|
|
||||||
|
|
||||||
# 任务信息
|
|
||||||
# 斯拉夫语族:俄语、波兰语
|
|
||||||
# 日耳曼语族:英语、德语、荷兰语
|
|
||||||
# 拉丁语族(罗曼语族):西班牙语、葡萄牙语、法国语、意大利语
|
|
||||||
# 闪米特语族:阿拉伯语、希伯来语
|
|
||||||
|
|
||||||
# 提交配置 & 启动被测服务
|
|
||||||
if os.getenv("DATASET_FILEPATH", ""):
|
|
||||||
with open(submit_config_filepath, "r") as fp:
|
|
||||||
st_config = yaml.safe_load(fp)
|
|
||||||
if "values" not in st_config:
|
|
||||||
st_config["values"] = {}
|
|
||||||
st_config["values"]["resources"] = {}
|
|
||||||
st_config["values"]["resources"]["limits"] = {}
|
|
||||||
st_config["values"]["resources"]["limits"]["cpu"] = CPU
|
|
||||||
st_config["values"]["resources"]["limits"]["memory"] = MEMORY
|
|
||||||
# st_config["values"]['resources']['limits']['nvidia.com/gpu'] = '1'
|
|
||||||
# st_config["values"]['resources']['limits']['nvidia.com/gpumem'] = "1843"
|
|
||||||
# st_config["values"]['resources']['limits']['nvidia.com/gpucores'] = "8"
|
|
||||||
st_config["values"]["resources"]["requests"] = {}
|
|
||||||
st_config["values"]["resources"]["requests"]["cpu"] = CPU
|
|
||||||
st_config["values"]["resources"]["requests"]["memory"] = MEMORY
|
|
||||||
# st_config["values"]['resources']['requests']['nvidia.com/gpu'] = '1'
|
|
||||||
# st_config["values"]['resources']['requests']['nvidia.com/gpumem'] = "1843"
|
|
||||||
# st_config["values"]['resources']['requests']['nvidia.com/gpucores'] = "8"
|
|
||||||
# st_config['values']['nodeSelector'] = {}
|
|
||||||
# st_config["values"]["nodeSelector"][
|
|
||||||
# "contest.4pd.io/accelerator"
|
|
||||||
# ] = "A10vgpu"
|
|
||||||
# st_config['values']['tolerations'] = []
|
|
||||||
# toleration_item = {}
|
|
||||||
# toleration_item['key'] = 'hosttype'
|
|
||||||
# toleration_item['operator'] = 'Equal'
|
|
||||||
# toleration_item['value'] = 'vgpu'
|
|
||||||
# toleration_item['effect'] = 'NoSchedule'
|
|
||||||
# st_config['values']['tolerations'].append(toleration_item)
|
|
||||||
if os.getenv("RESOURCE_TYPE", "cpu") == "cpu":
|
|
||||||
values = st_config["values"]
|
|
||||||
limits = values.get("resources", {}).get("limits", {})
|
|
||||||
requests = values.get("resources", {}).get("requests", {})
|
|
||||||
if (
|
|
||||||
"nvidia.com/gpu" in limits
|
|
||||||
or "nvidia.com/gpumem" in limits
|
|
||||||
or "nvidia.com/gpucores" in limits
|
|
||||||
or "nvidia.com/gpu" in requests
|
|
||||||
or "nvidia.com/gpumem" in requests
|
|
||||||
or "nvidia.com/gpucores" in requests
|
|
||||||
):
|
|
||||||
raise Exception("禁止使用GPU!")
|
|
||||||
else:
|
|
||||||
vgpu_num = int(os.getenv("SUT_VGPU", "3"))
|
|
||||||
st_config["values"]["resources"]["limits"]["nvidia.com/gpu"] = (
|
|
||||||
str(vgpu_num)
|
|
||||||
)
|
|
||||||
st_config["values"]["resources"]["limits"][
|
|
||||||
"nvidia.com/gpumem"
|
|
||||||
] = str(1843 * vgpu_num)
|
|
||||||
st_config["values"]["resources"]["limits"][
|
|
||||||
"nvidia.com/gpucores"
|
|
||||||
] = str(8 * vgpu_num)
|
|
||||||
st_config["values"]["resources"]["requests"][
|
|
||||||
"nvidia.com/gpu"
|
|
||||||
] = str(vgpu_num)
|
|
||||||
st_config["values"]["resources"]["requests"][
|
|
||||||
"nvidia.com/gpumem"
|
|
||||||
] = str(1843 * vgpu_num)
|
|
||||||
st_config["values"]["resources"]["requests"][
|
|
||||||
"nvidia.com/gpucores"
|
|
||||||
] = str(8 * vgpu_num)
|
|
||||||
st_config["values"]["nodeSelector"] = {}
|
|
||||||
st_config["values"]["nodeSelector"][
|
|
||||||
"contest.4pd.io/accelerator"
|
|
||||||
] = "A10vgpu"
|
|
||||||
st_config["values"]["tolerations"] = []
|
|
||||||
toleration_item = {}
|
|
||||||
toleration_item["key"] = "hosttype"
|
|
||||||
toleration_item["operator"] = "Equal"
|
|
||||||
toleration_item["value"] = "vgpu"
|
|
||||||
toleration_item["effect"] = "NoSchedule"
|
|
||||||
st_config["values"]["tolerations"].append(toleration_item)
|
|
||||||
if "docker_images" in st_config:
|
|
||||||
sut_url = "ws://172.26.1.75:9827"
|
|
||||||
os.environ["test"] = "1"
|
|
||||||
elif "docker_image" in st_config:
|
|
||||||
sut_url = register_sut(st_config, resource_name)
|
|
||||||
elif UNIT_TEST:
|
|
||||||
sut_url = "ws://172.27.231.36:80"
|
|
||||||
else:
|
|
||||||
logger.error("config 配置错误,没有 docker_image")
|
|
||||||
os._exit(1)
|
|
||||||
return sut_url
|
|
||||||
else:
|
|
||||||
os.environ["test"] = "1"
|
|
||||||
sut_url = "ws://172.27.231.36:80"
|
|
||||||
sut_url = "ws://172.26.1.75:9827"
|
|
||||||
return sut_url
|
|
||||||
|
|
||||||
|
|
||||||
def load_merge_dataset(dataset_filepath: str) -> dict:
|
|
||||||
local_dataset_path = "./dataset"
|
|
||||||
os.makedirs(local_dataset_path, exist_ok=True)
|
|
||||||
with zipfile.ZipFile(dataset_filepath) as zf:
|
|
||||||
zf.extractall(local_dataset_path)
|
|
||||||
|
|
||||||
config = {}
|
|
||||||
sub_datasets = os.listdir(local_dataset_path)
|
|
||||||
for sub_dataset in sub_datasets:
|
|
||||||
if sub_dataset.startswith("asr."):
|
|
||||||
lang = sub_dataset[4:]
|
|
||||||
lang_path = os.path.join(local_dataset_path, lang)
|
|
||||||
os.makedirs(lang_path, exist_ok=True)
|
|
||||||
with zipfile.ZipFile(
|
|
||||||
os.path.join(local_dataset_path, sub_dataset)
|
|
||||||
) as zf:
|
|
||||||
zf.extractall(lang_path)
|
|
||||||
lang_config_path = os.path.join(lang_path, "data.yaml")
|
|
||||||
with open(lang_config_path, "r") as fp:
|
|
||||||
lang_config = yaml.safe_load(fp)
|
|
||||||
audio_lengths = {}
|
|
||||||
for query_item in lang_config.get("query_data", []):
|
|
||||||
audio_path = os.path.join(
|
|
||||||
lang_path,
|
|
||||||
query_item["file"],
|
|
||||||
)
|
|
||||||
query_item["file"] = audio_path
|
|
||||||
audio_lengths[query_item["file"]] = os.path.getsize(
|
|
||||||
audio_path,
|
|
||||||
)
|
|
||||||
lang_config["query_data"] = sorted(
|
|
||||||
lang_config.get("query_data", []),
|
|
||||||
key=lambda x: audio_lengths[x["file"]],
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
idx = 0
|
|
||||||
length = 0.0
|
|
||||||
for query_item in lang_config["query_data"]:
|
|
||||||
audio_length = audio_lengths[query_item["file"]]
|
|
||||||
length += audio_length / 32000
|
|
||||||
idx += 1
|
|
||||||
# 每个语言限制半个小时长度
|
|
||||||
if length >= 30 * 60:
|
|
||||||
break
|
|
||||||
|
|
||||||
lang_config["query_data"] = lang_config["query_data"][:idx]
|
|
||||||
config[lang] = lang_config
|
|
||||||
|
|
||||||
config["query_data"] = []
|
|
||||||
for lang, lang_config in config.items():
|
|
||||||
if lang == "query_data":
|
|
||||||
continue
|
|
||||||
for query_item in lang_config["query_data"]:
|
|
||||||
config["query_data"].append(
|
|
||||||
{
|
|
||||||
**query_item,
|
|
||||||
"lang": lang,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
random.Random(0).shuffle(config["query_data"])
|
|
||||||
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
|
||||||
def postprocess_failed():
|
|
||||||
open(SUT_SHARE_PUBLIC_FAIL, "w").close()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
dataset_filepath = os.getenv(
|
|
||||||
"DATASET_FILEPATH",
|
|
||||||
"/Users/4paradigm/Projects/dataset/asr/de.zip",
|
|
||||||
# "./tests/resources/en.zip",
|
|
||||||
)
|
|
||||||
result_filepath = os.getenv("RESULT_FILEPATH", "./out/result")
|
|
||||||
bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase")
|
|
||||||
detail_cases_filepath = os.getenv(
|
|
||||||
"DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl"
|
|
||||||
)
|
|
||||||
thread_num = int(os.getenv("THREAD_NUM", "1"))
|
|
||||||
|
|
||||||
# 数据集处理
|
|
||||||
config = {}
|
|
||||||
if os.getenv("MERGE_DATASET", "1"):
|
|
||||||
config = load_merge_dataset(dataset_filepath)
|
|
||||||
dataset_query = config["query_data"]
|
|
||||||
else:
|
|
||||||
local_dataset_path = "./dataset"
|
|
||||||
os.makedirs(local_dataset_path, exist_ok=True)
|
|
||||||
with zipfile.ZipFile(dataset_filepath) as zf:
|
|
||||||
zf.extractall(local_dataset_path)
|
|
||||||
config_path = os.path.join(local_dataset_path, "data.yaml")
|
|
||||||
with open(config_path, "r") as fp:
|
|
||||||
dataset_config = yaml.safe_load(fp)
|
|
||||||
# 读取所有的音频,进而获得音频的总长度,最后按照音频长度对 query_data 进行降序排序
|
|
||||||
lang = os.getenv("lang")
|
|
||||||
if lang is None:
|
|
||||||
lang = dataset_config.get("global", {}).get("lang", "en")
|
|
||||||
audio_lengths = []
|
|
||||||
for query_item in dataset_config.get("query_data", []):
|
|
||||||
query_item["lang"] = lang
|
|
||||||
audio_path = os.path.join(local_dataset_path, query_item["file"])
|
|
||||||
query_item["file"] = audio_path
|
|
||||||
audio_lengths.append(os.path.getsize(audio_path) / 1024 / 1024)
|
|
||||||
dataset_config["query_data"] = sorted(
|
|
||||||
dataset_config.get("query_data", []),
|
|
||||||
key=lambda x: audio_lengths[dataset_config["query_data"].index(x)],
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
# 数据集信息
|
|
||||||
# dataset_global_config = dataset_config.get("global", {})
|
|
||||||
dataset_query = dataset_config.get("query_data", {})
|
|
||||||
config[lang] = dataset_config
|
|
||||||
|
|
||||||
# sut url
|
|
||||||
sut_url = get_sut_url()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 开始测试
|
|
||||||
logger.info("开始执行")
|
|
||||||
evaluator = BaseEvaluator()
|
|
||||||
future_list = []
|
|
||||||
with ThreadPoolExecutor(max_workers=thread_num) as executor:
|
|
||||||
for idx, query_item in enumerate(dataset_query):
|
|
||||||
context = ASRContext(
|
|
||||||
**config[query_item["lang"]].get("global", {}),
|
|
||||||
)
|
|
||||||
context.lang = query_item["lang"]
|
|
||||||
context.file_path = query_item["file"]
|
|
||||||
context.append_labels(query_item["voice"])
|
|
||||||
future = executor.submit(
|
|
||||||
ClientAsync(sut_url, context, idx).action
|
|
||||||
)
|
|
||||||
future_list.append(future)
|
|
||||||
for future in concurrent.futures.as_completed(future_list):
|
|
||||||
context = future.result()
|
|
||||||
evaluator.evaluate(context)
|
|
||||||
detail_case = evaluator.gen_detail_case()
|
|
||||||
with open(detail_cases_filepath, "a") as fp:
|
|
||||||
fp.write(
|
|
||||||
json.dumps(
|
|
||||||
detail_case.to_dict(),
|
|
||||||
ensure_ascii=False,
|
|
||||||
)
|
|
||||||
+ "\n",
|
|
||||||
)
|
|
||||||
del context
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
evaluator.post_evaluate()
|
|
||||||
output_result = evaluator.gen_result()
|
|
||||||
logger.info("执行完成")
|
|
||||||
|
|
||||||
with open(result_filepath, "w") as fp:
|
|
||||||
json.dump(output_result, fp, indent=2, ensure_ascii=False)
|
|
||||||
with open(bad_cases_filepath, "w") as fp:
|
|
||||||
fp.write("当前榜单不存在 Bad Case\n")
|
|
||||||
|
|
||||||
if SHARE_SUT:
|
|
||||||
with open(SUT_SHARE_JOB_STATUS, "w") as f:
|
|
||||||
f.write("success")
|
|
||||||
|
|
||||||
fcntl.flock(fd_lock, fcntl.LOCK_UN)
|
|
||||||
fd_lock.close()
|
|
||||||
while SHARE_SUT and do_deploy_chart:
|
|
||||||
time.sleep(30)
|
|
||||||
success_num = 0
|
|
||||||
for job_status_file in glob.glob(dirname + "/job_status.*"):
|
|
||||||
with open(job_status_file, "r") as f:
|
|
||||||
job_status = f.read()
|
|
||||||
success_num += job_status == "success"
|
|
||||||
if success_num == int(DATASET_NUM):
|
|
||||||
break
|
|
||||||
logger.info("Waiting for all jobs to complete")
|
|
||||||
except Exception:
|
|
||||||
if SHARE_SUT:
|
|
||||||
postprocess_failed()
|
|
||||||
raise
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,90 +0,0 @@
|
|||||||
import os
|
|
||||||
from copy import deepcopy
|
|
||||||
from typing import Dict, List, Optional
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
from schemas.stream import StreamDataModel
|
|
||||||
|
|
||||||
|
|
||||||
class LabelContext(BaseModel):
|
|
||||||
start: float
|
|
||||||
end: float
|
|
||||||
answer: str
|
|
||||||
|
|
||||||
|
|
||||||
class PredContext(BaseModel):
|
|
||||||
recognition_results: StreamDataModel
|
|
||||||
recv_time: Optional[float] = Field(None)
|
|
||||||
send_time: Optional[float] = Field(None)
|
|
||||||
|
|
||||||
|
|
||||||
class ASRContext:
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
self.bits = kwargs.get("bits", 16)
|
|
||||||
self.channel = kwargs.get("channel", 1)
|
|
||||||
self.sample_rate = kwargs.get("sample_rate", 16000)
|
|
||||||
self.audio_format = kwargs.get("format", "wav")
|
|
||||||
self.enable_words = kwargs.get("enable_words", True)
|
|
||||||
self.char_contains_rate = kwargs.get("char_contains_rate", 0.8)
|
|
||||||
self.lang = os.getenv("lang")
|
|
||||||
if self.lang is None:
|
|
||||||
self.lang = kwargs.get("lang", "en")
|
|
||||||
self.stream = kwargs.get("stream", True)
|
|
||||||
|
|
||||||
self.wait_time = float(os.getenv("wait_time", 0.1))
|
|
||||||
self.chunk_size = self.sample_rate * self.bits / 8 * self.wait_time
|
|
||||||
if int(os.getenv('chunk_size_set', 0)):
|
|
||||||
self.chunk_size = int(os.getenv('chunk_size_set', 0))
|
|
||||||
|
|
||||||
self.audio_length = 0
|
|
||||||
self.file_path = ""
|
|
||||||
|
|
||||||
self.labels: List[LabelContext] = kwargs.get("labels", [])
|
|
||||||
self.preds: List[PredContext] = kwargs.get("preds", [])
|
|
||||||
|
|
||||||
self.label_sentences: List[str] = []
|
|
||||||
self.pred_sentences: List[str] = []
|
|
||||||
|
|
||||||
self.send_time_start_end = []
|
|
||||||
self.recv_time_start_end = []
|
|
||||||
|
|
||||||
self.fail = False
|
|
||||||
self.fail_char_contains_rate_num = 0
|
|
||||||
|
|
||||||
self.punctuation_num = 0
|
|
||||||
self.pred_punctuation_num = 0
|
|
||||||
|
|
||||||
def append_labels(self, voices: List[Dict]):
|
|
||||||
for voice_data in voices:
|
|
||||||
label_context = LabelContext(**voice_data)
|
|
||||||
self.labels.append(label_context)
|
|
||||||
|
|
||||||
def append_preds(
|
|
||||||
self,
|
|
||||||
predict_data: List[StreamDataModel],
|
|
||||||
send_time: List[float],
|
|
||||||
recv_time: List[float],
|
|
||||||
):
|
|
||||||
self.send_time_start_end = [send_time[0], send_time[-1]] if len(send_time) > 0 else []
|
|
||||||
self.recv_time_start_end = [recv_time[0], recv_time[-1]] if len(recv_time) > 0 else []
|
|
||||||
for pred_item, send_time_item, recv_time_item in zip(predict_data, send_time, recv_time):
|
|
||||||
pred_item = deepcopy(pred_item)
|
|
||||||
pred_context = PredContext(recognition_results=pred_item.model_dump())
|
|
||||||
pred_context.send_time = send_time_item
|
|
||||||
pred_context.recv_time = recv_time_item
|
|
||||||
self.preds.append(pred_context)
|
|
||||||
|
|
||||||
def to_dict(self):
|
|
||||||
return {
|
|
||||||
"bits": self.bits,
|
|
||||||
"channel": self.channel,
|
|
||||||
"sample_rate": self.sample_rate,
|
|
||||||
"audio_format": self.audio_format,
|
|
||||||
"enable_words": self.enable_words,
|
|
||||||
"stream": self.stream,
|
|
||||||
"wait_time": self.wait_time,
|
|
||||||
"chunk_size": self.chunk_size,
|
|
||||||
"labels": [item.model_dump_json() for item in self.labels],
|
|
||||||
"preds": [item.model_dump_json() for item in self.preds],
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
from typing import List
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
|
|
||||||
class QueryDataSentence(BaseModel):
|
|
||||||
answer: str = Field(description="文本label")
|
|
||||||
start: float = Field(description="句子开始时间")
|
|
||||||
end: float = Field(description="句子结束时间")
|
|
||||||
|
|
||||||
|
|
||||||
class QueryData(BaseModel):
|
|
||||||
lang: str = Field(description="语言")
|
|
||||||
file: str = Field(description="音频文件位置")
|
|
||||||
duration: float = Field(description="音频长度")
|
|
||||||
voice: List[QueryDataSentence] = Field(
|
|
||||||
description="音频文件的文本label内容"
|
|
||||||
)
|
|
||||||
@@ -1,66 +0,0 @@
|
|||||||
from typing import List
|
|
||||||
|
|
||||||
from pydantic import BaseModel, ValidationError, field_validator
|
|
||||||
from pydantic import model_validator
|
|
||||||
|
|
||||||
|
|
||||||
class StreamWordsModel(BaseModel):
|
|
||||||
text: str
|
|
||||||
start_time: float
|
|
||||||
end_time: float
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def check_result(self):
|
|
||||||
if self.end_time < self.start_time:
|
|
||||||
raise ValidationError("end-time 小于 start-time, error")
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
class StreamDataModel(BaseModel):
|
|
||||||
text: str
|
|
||||||
language: str
|
|
||||||
final_result: bool
|
|
||||||
para_seq: int
|
|
||||||
start_time: float
|
|
||||||
end_time: float
|
|
||||||
words: List[StreamWordsModel]
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def check_result(self):
|
|
||||||
if self.end_time < self.start_time:
|
|
||||||
raise ValidationError("end-time 小于 start-time, error")
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
class StreamResultModel(BaseModel):
|
|
||||||
asr_results: StreamDataModel
|
|
||||||
|
|
||||||
@field_validator('asr_results', mode="after")
|
|
||||||
def convert_to_seconds(cls, v: StreamDataModel, values):
|
|
||||||
# 在这里处理除以1000的逻辑
|
|
||||||
v.end_time = v.end_time / 1000
|
|
||||||
v.start_time = v.start_time / 1000
|
|
||||||
for word in v.words:
|
|
||||||
word.start_time /= 1000
|
|
||||||
word.end_time /= 1000
|
|
||||||
return v
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
validate_assignment = True
|
|
||||||
|
|
||||||
|
|
||||||
class NonStreamDataModel(BaseModel):
|
|
||||||
text: str
|
|
||||||
para_seq: int
|
|
||||||
start_time: float
|
|
||||||
end_time: float
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def check_result(self):
|
|
||||||
if self.end_time < self.start_time:
|
|
||||||
raise ValidationError("end-time 小于 start-time, error")
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
class NonStreamResultModel(BaseModel):
|
|
||||||
contents: List[NonStreamDataModel]
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
|
|
||||||
def main(dataset_dir):
|
|
||||||
dirs = os.listdir(dataset_dir)
|
|
||||||
dirs = list(
|
|
||||||
filter(lambda x: os.path.isdir(os.path.join(dataset_dir, x)), dirs)
|
|
||||||
)
|
|
||||||
|
|
||||||
problem_dirs = set()
|
|
||||||
problem_count = defaultdict(int)
|
|
||||||
for dir in dirs:
|
|
||||||
with open(os.path.join(dataset_dir, dir, "data.yaml"), "r") as f:
|
|
||||||
data = yaml.full_load(f)
|
|
||||||
for query_i, query in enumerate(data["query_data"]):
|
|
||||||
voices = sorted(query["voice"], key=lambda x: x["start"])
|
|
||||||
if voices != query["voice"]:
|
|
||||||
print("-----", dir)
|
|
||||||
if voices[0]["start"] > voices[0]["end"]:
|
|
||||||
print(
|
|
||||||
"err1: %s 第%s个query的第%d个voice的start大于end: %s"
|
|
||||||
% (dir, query_i, 0, voices[0]["answer"])
|
|
||||||
)
|
|
||||||
problem_dirs.add(dir)
|
|
||||||
for voice_i in range(1, len(voices)):
|
|
||||||
voice = voices[voice_i]
|
|
||||||
if voice["start"] > voice["end"]:
|
|
||||||
print(
|
|
||||||
"err1: %s 第%s个query的第%d个voice的start大于end: %s"
|
|
||||||
% (dir, query_i, voice_i, voice["answer"])
|
|
||||||
)
|
|
||||||
problem_dirs.add(dir)
|
|
||||||
if voice["start"] < voices[voice_i - 1]["end"]:
|
|
||||||
print(
|
|
||||||
"err2: %s 第%s个query的第%d个voice的start小于前一个voice的end: %s"
|
|
||||||
% (dir, query_i, voice_i, voice["answer"])
|
|
||||||
)
|
|
||||||
problem_dirs.add(dir)
|
|
||||||
problem_count[dir] += 1
|
|
||||||
print(len(dirs))
|
|
||||||
print(problem_dirs)
|
|
||||||
print(problem_count)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
print("指定 测试数据集文件夹")
|
|
||||||
sys.exit(1)
|
|
||||||
main(sys.argv[1])
|
|
||||||
@@ -1,108 +0,0 @@
|
|||||||
import json
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import sys
|
|
||||||
import zipfile
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
"""
|
|
||||||
target
|
|
||||||
{
|
|
||||||
"global": {
|
|
||||||
"lang": ""
|
|
||||||
},
|
|
||||||
"query_data": [
|
|
||||||
"file": "",
|
|
||||||
"duration": 2.0,
|
|
||||||
"voice": [
|
|
||||||
{
|
|
||||||
"answer": "",
|
|
||||||
"start": 0.0,
|
|
||||||
"end": 1.0
|
|
||||||
}
|
|
||||||
]
|
|
||||||
]
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def situation_a(meta, dataset_folder, output_folder):
|
|
||||||
"""
|
|
||||||
{
|
|
||||||
"combined": {
|
|
||||||
"en": [
|
|
||||||
{
|
|
||||||
"wav": "*.wav",
|
|
||||||
"transcriptions": [
|
|
||||||
{
|
|
||||||
"text": "",
|
|
||||||
"start": 0.0,
|
|
||||||
"end": 1.0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"duration": 2.0
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
meta = meta["combined"]
|
|
||||||
|
|
||||||
for lang, arr in meta.items():
|
|
||||||
print("processing", lang)
|
|
||||||
assert len(lang) == 2
|
|
||||||
lang_folder = os.path.join(output_folder, lang)
|
|
||||||
os.makedirs(lang_folder, exist_ok=True)
|
|
||||||
data = {"global": {"lang": lang}, "query_data": []}
|
|
||||||
query_data = data["query_data"]
|
|
||||||
for item in arr:
|
|
||||||
os.makedirs(
|
|
||||||
os.path.join(lang_folder, os.path.dirname(item["wav"])),
|
|
||||||
exist_ok=True,
|
|
||||||
)
|
|
||||||
mp3_file = item["wav"][:-4] + ".mp3"
|
|
||||||
shutil.copyfile(
|
|
||||||
os.path.join(dataset_folder, mp3_file),
|
|
||||||
os.path.join(lang_folder, mp3_file),
|
|
||||||
)
|
|
||||||
query_data_item = {
|
|
||||||
"file": mp3_file,
|
|
||||||
"duration": float(item["duration"]),
|
|
||||||
"voice": [],
|
|
||||||
}
|
|
||||||
query_data.append(query_data_item)
|
|
||||||
voice = query_data_item["voice"]
|
|
||||||
for v in item["transcriptions"]:
|
|
||||||
voice.append(
|
|
||||||
{
|
|
||||||
"answer": v["text"],
|
|
||||||
"start": float(v["start"]),
|
|
||||||
"end": float(v["end"]),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
with open(os.path.join(lang_folder, "data.yaml"), "w") as f:
|
|
||||||
yaml.dump(data, f, indent=2, allow_unicode=True, encoding="utf-8")
|
|
||||||
with zipfile.ZipFile(
|
|
||||||
os.path.join(output_folder, lang + ".zip"), "w"
|
|
||||||
) as ziper:
|
|
||||||
dirname = lang_folder
|
|
||||||
for path, _, files in os.walk(dirname):
|
|
||||||
for file in files:
|
|
||||||
ziper.write(
|
|
||||||
os.path.join(path, file),
|
|
||||||
os.path.join(path[len(dirname) :], file),
|
|
||||||
zipfile.ZIP_DEFLATED,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
if len(sys.argv) < 3:
|
|
||||||
print("指定 数据集文件夹路径 输出路径")
|
|
||||||
sys.exit(1)
|
|
||||||
dataset_folder = sys.argv[1]
|
|
||||||
output_folder = sys.argv[2]
|
|
||||||
|
|
||||||
with open(os.path.join(dataset_folder, "meta.json")) as f:
|
|
||||||
meta = json.load(f)
|
|
||||||
situation_a(meta, dataset_folder, output_folder)
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
import json
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from schemas.dataset import QueryData
|
|
||||||
from schemas.stream import StreamDataModel
|
|
||||||
from utils.evaluator_plus import evaluate_editops
|
|
||||||
|
|
||||||
|
|
||||||
def main(detailcase_file: str):
|
|
||||||
with open(detailcase_file) as f:
|
|
||||||
d = json.load(f)[0]
|
|
||||||
preds = d["preds"]
|
|
||||||
preds = list(map(lambda x: StreamDataModel(**x), preds))
|
|
||||||
preds = list(filter(lambda x: x.final_result, preds))
|
|
||||||
label = d["label"]
|
|
||||||
label = QueryData(**label)
|
|
||||||
print(evaluate_editops(label, preds))
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_from_record(detailcase_file: str, record_path: str):
|
|
||||||
with open(detailcase_file) as f:
|
|
||||||
d = json.load(f)[0]
|
|
||||||
label = d["label"]
|
|
||||||
label = QueryData(**label)
|
|
||||||
with open(record_path) as f:
|
|
||||||
record = json.load(f)
|
|
||||||
tokens_pred = record["tokens_pred"]
|
|
||||||
tokens_label = record["tokens_label"]
|
|
||||||
recognition_results = record["recognition_results"]
|
|
||||||
recognition_results = list(
|
|
||||||
map(lambda x: StreamDataModel(**x), recognition_results)
|
|
||||||
)
|
|
||||||
a, b = [], []
|
|
||||||
for i, rr in enumerate(recognition_results):
|
|
||||||
if rr.final_result:
|
|
||||||
a.append(tokens_pred[i])
|
|
||||||
b.append(rr)
|
|
||||||
tokens_pred = a
|
|
||||||
recognition_results = b
|
|
||||||
|
|
||||||
print(
|
|
||||||
evaluate_editops(
|
|
||||||
label,
|
|
||||||
recognition_results,
|
|
||||||
tokens_pred,
|
|
||||||
tokens_label,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
print("请指定 detailcase 文件路径")
|
|
||||||
sys.exit(1)
|
|
||||||
main(sys.argv[1])
|
|
||||||
# evaluate_from_record(sys.argv[1], sys.argv[2])
|
|
||||||
BIN
ssh-keygen
BIN
ssh-keygen
Binary file not shown.
@@ -1,11 +0,0 @@
|
|||||||
FROM harbor.4pd.io/inf/base-python3.8-ubuntu:1.1.0
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
|
|
||||||
ADD ./requirements.txt /workspace
|
|
||||||
RUN pip install -r ./requirements.txt -i https://nexus.4pd.io/repository/pypi-all/simple --trusted-host nexus.4pd.io --extra-index-url https://mirrors.aliyun.com/pypi/simple/ \
|
|
||||||
&& pip cache purge
|
|
||||||
|
|
||||||
ADD . /workspace
|
|
||||||
|
|
||||||
CMD ["python", "main.py"]
|
|
||||||
@@ -1,313 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import flask
|
|
||||||
import requests
|
|
||||||
from werkzeug.datastructures import FileStorage
|
|
||||||
|
|
||||||
app = flask.Flask(__name__)
|
|
||||||
heartbeat_active = False
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
log.propagate = False
|
|
||||||
|
|
||||||
level = logging.INFO
|
|
||||||
|
|
||||||
log.setLevel(level)
|
|
||||||
|
|
||||||
formatter = logging.Formatter(
|
|
||||||
"[%(asctime)s] %(levelname)s : %(pathname)s:%(lineno)d - %(message)s",
|
|
||||||
"%Y-%m-%d %H:%M:%S",
|
|
||||||
)
|
|
||||||
|
|
||||||
streamHandler = logging.StreamHandler()
|
|
||||||
streamHandler.setLevel(level)
|
|
||||||
streamHandler.setFormatter(formatter)
|
|
||||||
log.addHandler(streamHandler)
|
|
||||||
|
|
||||||
|
|
||||||
def heartbeat(url):
|
|
||||||
global heartbeat_active
|
|
||||||
if heartbeat_active:
|
|
||||||
return
|
|
||||||
heartbeat_active = True
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
requests.post(url, json={"status": "RUNNING"})
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
|
|
||||||
def asr(
|
|
||||||
audio_file: FileStorage,
|
|
||||||
language: Optional[str],
|
|
||||||
progressCallbackUrl: str,
|
|
||||||
taskId: str,
|
|
||||||
):
|
|
||||||
"""TODO: 读取audio_file, 调用语音识别服务, 实时返回识别结果"""
|
|
||||||
|
|
||||||
# ignore BEGIN
|
|
||||||
# 此处为榜单本地测试使用
|
|
||||||
if os.getenv("LOCAL_TEST"):
|
|
||||||
return local_test(progressCallbackUrl, taskId)
|
|
||||||
# ignore END
|
|
||||||
|
|
||||||
language = "de"
|
|
||||||
# 某一次识别返回
|
|
||||||
requests.post(
|
|
||||||
progressCallbackUrl,
|
|
||||||
json={
|
|
||||||
"taskId": taskId,
|
|
||||||
"status": "RUNNING",
|
|
||||||
"recognition_results": { # 传增量结果, status如果是FINISHED, 或者ERROR, 这个字段请不要传值
|
|
||||||
"text": "最先启动的还是",
|
|
||||||
"final_result": True,
|
|
||||||
"para_seq": 0,
|
|
||||||
"language": language,
|
|
||||||
"start_time": 6300,
|
|
||||||
"end_time": 6421,
|
|
||||||
"words": [
|
|
||||||
{
|
|
||||||
"text": "最",
|
|
||||||
"start_time": 6300,
|
|
||||||
"end_time": 6321,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "先",
|
|
||||||
"start_time": 6321,
|
|
||||||
"end_time": 6345,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "启",
|
|
||||||
"start_time": 6345,
|
|
||||||
"end_time": 6350,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "动",
|
|
||||||
"start_time": 6350,
|
|
||||||
"end_time": 6370,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "的",
|
|
||||||
"start_time": 6370,
|
|
||||||
"end_time": 6386,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "还",
|
|
||||||
"start_time": 6386,
|
|
||||||
"end_time": 6421,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "是",
|
|
||||||
"start_time": 6421,
|
|
||||||
"end_time": 6435,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
# ... 识别结果返回完毕
|
|
||||||
|
|
||||||
# 识别结束
|
|
||||||
requests.post(
|
|
||||||
progressCallbackUrl,
|
|
||||||
json={
|
|
||||||
"taskId": taskId,
|
|
||||||
"status": "FINISHED",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/predict")
|
|
||||||
def predict():
|
|
||||||
body = flask.request.form
|
|
||||||
language = body.get("language")
|
|
||||||
if language is None:
|
|
||||||
"自行判断语种"
|
|
||||||
taskId = body["taskId"]
|
|
||||||
progressCallbackUrl = body["progressCallbackUrl"]
|
|
||||||
heartbeatUrl = body["heartbeatUrl"]
|
|
||||||
|
|
||||||
threading.Thread(
|
|
||||||
target=heartbeat, args=(heartbeatUrl,), daemon=True
|
|
||||||
).start()
|
|
||||||
|
|
||||||
audio_file = flask.request.files["file"]
|
|
||||||
# audio_file.stream # 读取文件流
|
|
||||||
# audio_file.save("audio.mp3") # 保存文件
|
|
||||||
threading.Thread(
|
|
||||||
target=asr,
|
|
||||||
args=(audio_file, language, progressCallbackUrl, taskId),
|
|
||||||
daemon=True,
|
|
||||||
).start()
|
|
||||||
return flask.jsonify({"status": "OK"})
|
|
||||||
|
|
||||||
|
|
||||||
# ignore BEGIN
|
|
||||||
def local_test(progressCallbackUrl: str, taskId: str):
|
|
||||||
"""忽略此方法, 此方法为榜单本地调试使用"""
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
def callback(content):
|
|
||||||
try:
|
|
||||||
if content is None:
|
|
||||||
requests.post(
|
|
||||||
progressCallbackUrl,
|
|
||||||
json={"taskId": taskId, "status": "FINISHED"},
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
requests.post(
|
|
||||||
progressCallbackUrl,
|
|
||||||
json={
|
|
||||||
"taskId": taskId,
|
|
||||||
"status": "RUNNING",
|
|
||||||
"recognition_results": content,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
with open(
|
|
||||||
os.getenv("LOCAL_TEST_DATA_PATH", "../dataset/out/data.yaml")
|
|
||||||
) as f:
|
|
||||||
data = yaml.full_load(f)
|
|
||||||
|
|
||||||
voices = data["query_data"][0]["voice"]
|
|
||||||
|
|
||||||
# 首次发送
|
|
||||||
first_send_time = random.randint(3, 5)
|
|
||||||
send_interval = random.random() * 0
|
|
||||||
log.info("首次发送%ss 发送间隔%ss" % (first_send_time, send_interval))
|
|
||||||
time.sleep(first_send_time)
|
|
||||||
|
|
||||||
# 将句子拼接到一起
|
|
||||||
if random.random() < 0.3:
|
|
||||||
log.info("将部分句子合并成单句 每次合并的句子不超过3句")
|
|
||||||
rand_idx = 0
|
|
||||||
rand_sep = [0, len(voices) - 1]
|
|
||||||
while rand_sep[rand_idx] + 1 <= rand_sep[rand_idx + 1] - 1:
|
|
||||||
rand_cursep = random.randint(
|
|
||||||
rand_sep[rand_idx] + 1,
|
|
||||||
min(rand_sep[rand_idx + 1] - 1, rand_sep[rand_idx] + 1 + 3),
|
|
||||||
)
|
|
||||||
rand_sep.insert(rand_idx + 1, rand_cursep)
|
|
||||||
rand_idx += 1
|
|
||||||
merged_voices = []
|
|
||||||
for i, cur_sep in enumerate(rand_sep[:-1]):
|
|
||||||
voice = voices[cur_sep]
|
|
||||||
for j in range(cur_sep + 1, rand_sep[i + 1]):
|
|
||||||
voice["answer"] += voices[j]["answer"]
|
|
||||||
voice["end"] = voices[j]["end"]
|
|
||||||
merged_voices.append(voice)
|
|
||||||
merged_voices.append(voices[rand_sep[-1]])
|
|
||||||
voices = merged_voices
|
|
||||||
|
|
||||||
def split_and_keep(text, delimiters):
|
|
||||||
# 构建正则表达式模式,匹配文本或分隔符
|
|
||||||
pattern = "|".join(re.escape(delimiter) for delimiter in delimiters)
|
|
||||||
pattern = f"(?:[^{pattern}]+|[{pattern}])"
|
|
||||||
return re.findall(pattern, text)
|
|
||||||
|
|
||||||
puncs = [",", ".", "?", "!", ";", ":"]
|
|
||||||
|
|
||||||
para_seq = 0
|
|
||||||
for voice in voices:
|
|
||||||
answer: str = voice["answer"]
|
|
||||||
start_time: float = voice["start"]
|
|
||||||
end_time: float = voice["end"]
|
|
||||||
words = split_and_keep(answer, puncs)
|
|
||||||
temp_words = []
|
|
||||||
for i, word in enumerate(words):
|
|
||||||
if i > 0 and i < len(words) - 1 and random.random() < 0.15:
|
|
||||||
log.info("随机删除word")
|
|
||||||
continue
|
|
||||||
temp_words.extend(word.split(" "))
|
|
||||||
if len(temp_words) == 0:
|
|
||||||
temp_words = words[0].split(" ")
|
|
||||||
words = temp_words
|
|
||||||
answer = " ".join(words)
|
|
||||||
words = list(map(lambda x: x.strip(), words))
|
|
||||||
words = list(filter(lambda x: len(x) > 0, words))
|
|
||||||
|
|
||||||
# 将时间均匀分配到每个字上
|
|
||||||
words_withtime = []
|
|
||||||
word_unittime = (end_time - start_time) / len(words)
|
|
||||||
for i, word in enumerate(words):
|
|
||||||
word_start = start_time + word_unittime * i
|
|
||||||
word_end = word_start + word_unittime
|
|
||||||
words_withtime.append(
|
|
||||||
{
|
|
||||||
"text": word,
|
|
||||||
"start_time": word_start * 1000,
|
|
||||||
"end_time": word_end * 1000,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 将句子首尾的标点符号时间扩展到字上 标点符号时间为瞬间
|
|
||||||
punc_at = 0
|
|
||||||
while punc_at < len(words) and words[punc_at] in puncs:
|
|
||||||
punc_at += 1
|
|
||||||
if punc_at < len(words):
|
|
||||||
words_withtime[punc_at]["start_time"] = words_withtime[0][
|
|
||||||
"start_time"
|
|
||||||
]
|
|
||||||
for i in range(0, punc_at):
|
|
||||||
words_withtime[i]["start_time"] = words_withtime[0]["start_time"]
|
|
||||||
words_withtime[i]["end_time"] = words_withtime[0]["start_time"]
|
|
||||||
punc_at = len(words) - 1
|
|
||||||
while punc_at >= 0 and words[punc_at] in puncs:
|
|
||||||
punc_at -= 1
|
|
||||||
if punc_at >= 0:
|
|
||||||
words_withtime[punc_at]["end_time"] = words_withtime[-1]["end_time"]
|
|
||||||
for i in range(punc_at + 1, len(words)):
|
|
||||||
words_withtime[i]["start_time"] = (
|
|
||||||
words_withtime[-1]["end_time"] + 0.1
|
|
||||||
)
|
|
||||||
words_withtime[i]["end_time"] = words_withtime[-1]["end_time"] + 0.1
|
|
||||||
|
|
||||||
if random.random() < 0.4 and len(words_withtime) > 1:
|
|
||||||
log.info("发送一次final_result=False")
|
|
||||||
rand_idx = random.randint(1, len(words_withtime) - 1)
|
|
||||||
recognition_result = {
|
|
||||||
"text": " ".join(
|
|
||||||
map(lambda x: x["text"], words_withtime[:rand_idx])
|
|
||||||
),
|
|
||||||
"final_result": False,
|
|
||||||
"para_seq": para_seq,
|
|
||||||
"language": "de",
|
|
||||||
"start_time": start_time * 1000,
|
|
||||||
"end_time": end_time * 1000,
|
|
||||||
"words": words_withtime[:rand_idx],
|
|
||||||
}
|
|
||||||
callback(recognition_result)
|
|
||||||
|
|
||||||
recognition_result = {
|
|
||||||
"text": answer,
|
|
||||||
"final_result": True,
|
|
||||||
"para_seq": para_seq,
|
|
||||||
"language": "de",
|
|
||||||
"start_time": start_time * 1000,
|
|
||||||
"end_time": end_time * 1000,
|
|
||||||
"words": words_withtime,
|
|
||||||
}
|
|
||||||
callback(recognition_result)
|
|
||||||
para_seq += 1
|
|
||||||
log.info("send %s" % para_seq)
|
|
||||||
|
|
||||||
time.sleep(send_interval)
|
|
||||||
|
|
||||||
callback(None)
|
|
||||||
|
|
||||||
|
|
||||||
# ignore END
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
app.run(host="0.0.0.0", port=80)
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
flask
|
|
||||||
requests
|
|
||||||
pyyaml
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
import json
|
|
||||||
|
|
||||||
from schemas.dataset import QueryData
|
|
||||||
from schemas.stream import StreamDataModel
|
|
||||||
from utils.evaluator_plus import evaluate_editops
|
|
||||||
|
|
||||||
with open("out/detail_cases.json") as f:
|
|
||||||
detail_cases = json.load(f)
|
|
||||||
|
|
||||||
detail_case = detail_cases[0]
|
|
||||||
preds = []
|
|
||||||
for pred in detail_case["preds"]:
|
|
||||||
preds.append(StreamDataModel.model_validate(pred))
|
|
||||||
label = QueryData.model_validate(detail_case["label"])
|
|
||||||
|
|
||||||
print(evaluate_editops(label, preds))
|
|
||||||
@@ -1,93 +0,0 @@
|
|||||||
"""
|
|
||||||
f(a, b) 计算 a -> b 的编辑距离,使用的方法是之前asr榜单的方法
|
|
||||||
g(a, b) 计算 a -> b 的编辑距离,使用的是原始的编辑距离计算方法
|
|
||||||
test() 是对拍程序
|
|
||||||
"""
|
|
||||||
|
|
||||||
import random
|
|
||||||
import string
|
|
||||||
from copy import deepcopy
|
|
||||||
from typing import List, Tuple
|
|
||||||
|
|
||||||
import Levenshtein
|
|
||||||
|
|
||||||
|
|
||||||
def mapping(gt: str, dt: str):
|
|
||||||
return [i for i in gt], [i for i in dt]
|
|
||||||
|
|
||||||
|
|
||||||
def token_mapping(
|
|
||||||
tokens_gt: List[str], tokens_dt: List[str]
|
|
||||||
) -> Tuple[List[str], List[str]]:
|
|
||||||
arr1 = deepcopy(tokens_gt)
|
|
||||||
arr2 = deepcopy(tokens_dt)
|
|
||||||
operations = Levenshtein.editops(arr1, arr2)
|
|
||||||
for op in operations[::-1]:
|
|
||||||
if op[0] == "insert":
|
|
||||||
arr1.insert(op[1], None)
|
|
||||||
elif op[0] == "delete":
|
|
||||||
arr2.insert(op[2], None)
|
|
||||||
return arr1, arr2
|
|
||||||
|
|
||||||
|
|
||||||
def cer(tokens_gt_mapping: List[str], tokens_dt_mapping: List[str]):
|
|
||||||
"""输入的是经过编辑距离映射后的两个 token 序列,返回 1-cer, token-cnt"""
|
|
||||||
insert = sum(1 for item in tokens_gt_mapping if item is None)
|
|
||||||
delete = sum(1 for item in tokens_dt_mapping if item is None)
|
|
||||||
equal = sum(
|
|
||||||
1
|
|
||||||
for token_gt, token_dt in zip(tokens_gt_mapping, tokens_dt_mapping)
|
|
||||||
if token_gt == token_dt
|
|
||||||
)
|
|
||||||
replace = len(tokens_gt_mapping) - insert - equal # - delete
|
|
||||||
return replace, delete, insert
|
|
||||||
|
|
||||||
|
|
||||||
def f(a, b):
|
|
||||||
return cer(*token_mapping(*mapping(a, b)))
|
|
||||||
|
|
||||||
|
|
||||||
def raw(tokens_gt, tokens_dt):
|
|
||||||
arr1 = deepcopy(tokens_gt)
|
|
||||||
arr2 = deepcopy(tokens_dt)
|
|
||||||
operations = Levenshtein.editops(arr1, arr2)
|
|
||||||
insert = 0
|
|
||||||
delete = 0
|
|
||||||
replace = 0
|
|
||||||
for op in operations:
|
|
||||||
if op[0] == "insert":
|
|
||||||
insert += 1
|
|
||||||
if op[0] == "delete":
|
|
||||||
delete += 1
|
|
||||||
if op[0] == "replace":
|
|
||||||
replace += 1
|
|
||||||
return replace, delete, insert
|
|
||||||
|
|
||||||
|
|
||||||
def g(a, b):
|
|
||||||
return raw(*mapping(a, b))
|
|
||||||
|
|
||||||
|
|
||||||
def check(a, b):
|
|
||||||
ff = f(a, b)
|
|
||||||
gg = g(a, b)
|
|
||||||
if ff != gg:
|
|
||||||
print(ff, gg)
|
|
||||||
return ff == gg
|
|
||||||
|
|
||||||
|
|
||||||
def random_string(length):
|
|
||||||
letters = string.ascii_lowercase
|
|
||||||
return "".join(random.choice(letters) for i in range(length))
|
|
||||||
|
|
||||||
|
|
||||||
def test():
|
|
||||||
for _ in range(10000):
|
|
||||||
a = random_string(30)
|
|
||||||
b = random_string(30)
|
|
||||||
if not check(a, b):
|
|
||||||
print(a, b)
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
test()
|
|
||||||
Reference in New Issue
Block a user