update code

2025-08-22 18:00:46 +08:00
parent a575a38552
commit 1b78ebefdd
44 changed files with 0 additions and 2855 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/49
+++ b/49
@@ -1,49 +0,0 @@
 FROM harbor.4pd.io/inf/base-python3.8-ubuntu:1.1.0
 MAINTAINER shiguangchuan@4paradigm.com
 WORKDIR /workspace
 COPY ssh-keygen /bin
 RUN wget -q ftp://ftp.4pd.io/pub/pico/temp/pynini-2.1.6-cp38-cp38-manylinux_2_31_x86_64.whl && pip install pynini-2.1.6-cp38-cp38-manylinux_2_31_x86_64.whl && rm -f pynini-2.1.6-c    p38-cp38-manylinux_2_31_x86_64.whl
 ADD ./requirements.txt /workspace
 RUN pip install -r ./requirements.txt -i https://nexus.4pd.io/repository/pypi-all/simple --trusted-host nexus.4pd.io --extra-index-url https://mirrors.aliyun.com/pypi/simple/ \
    && pip cache purge \
    && ssh-keygen -f /workspace/ssh-key-ecdsa -t ecdsa -b 521 -q -N ""
 ADD . /workspace
 EXPOSE 80
 CMD ["python3", "run_callback.py"]
 ###########################
 ## Dockerfile（更新后）
 #FROM harbor.4pd.io/lab-platform/inf/python:3.9
 #WORKDIR /app
 ## 安装依赖
 ##RUN pip install torch librosa flask
 ##RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
 ##    pip cache purge && \
 ##    pip --default-timeout=1000 install torch librosa flask
 ## 删除原来的 COPY pytorch_model.bin /app/
 #COPY inference.py /app/  
 # 只需要复制启动脚本
 #EXPOSE 80
 #CMD ["python", "inference.py"]
 ####################
 ##############################更新0731#################################
--- a/helm-chart/.DS_Store
+++ b/helm-chart/.DS_Store
--- a/helm-chart/README.md
+++ b/helm-chart/README.md
@@ -1,77 +0,0 @@
 ## judgeflow chart 的要求
 ### values.yaml 文件必须包含如下字段，并且模板中必须引用 values.yaml 中的如下字段
 ```
 podLabels
 env
 volumeMounts
 volumes
 affinity
 ```
 ### values.yaml 文件必须在 volumeMounts 中声明如下卷
 ```
 workspace
 submit
 datafile
 ```
 ## 被测服务（sut） chart 的要求
 ### values.yaml 文件必须包含如下字段，并且资源模板中必须引用 values.yaml 中的如下字段
 ```
 podLabels
 affinity
 ```
 针对 podLabels 字段，values.yaml 中配置格式如下：
 ```
 podLabels: {}
 ```
 下面给出示例
 podLabels
 values.yaml
 templates/deployment.yaml
 ```
 metadata:
  labels:
    {{- with .Values.podLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
 ```
 affinity
 values.yaml
 ```
 affinity: {}
 ```
 templates/deployment.yaml
 ```
 spec:
  template:
    spec:
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
 ```
 ### 如果需要在 sut 中使用共享存储，则 sut chart 的 values.yaml 也必须包含如下字段，且模板中必须引用 values.yaml 中的如下字段
 ```
 volumeMounts
 volumes
 ```
--- a/helm-chart/asr-tco/.helmignore
+++ b/helm-chart/asr-tco/.helmignore
@@ -1,23 +0,0 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/helm-chart/asr-tco/Chart.yaml.tmpl
+++ b/helm-chart/asr-tco/Chart.yaml.tmpl
@@ -1,24 +0,0 @@
 apiVersion: v2
 name: ${chartName}
 description: Leaderboard judgeflow helm chart for demo
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: ${version}
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "${appVersion}"
--- a/helm-chart/asr-tco/templates/_helpers.tpl
+++ b/helm-chart/asr-tco/templates/_helpers.tpl
@@ -1,62 +0,0 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "judgeflow.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "judgeflow.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "judgeflow.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "judgeflow.labels" -}}
 helm.sh/chart: {{ include "judgeflow.chart" . }}
 {{ include "judgeflow.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "judgeflow.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "judgeflow.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "judgeflow.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "judgeflow.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/helm-chart/asr-tco/templates/hpa.yaml
+++ b/helm-chart/asr-tco/templates/hpa.yaml
@@ -1,32 +0,0 @@
 {{- if .Values.autoscaling.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: {{ include "judgeflow.fullname" . }}
  labels:
    {{- include "judgeflow.labels" . | nindent 4 }}
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "judgeflow.fullname" . }}
  minReplicas: {{ .Values.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
 {{- end }}
--- a/helm-chart/asr-tco/templates/ingress.yaml
+++ b/helm-chart/asr-tco/templates/ingress.yaml
@@ -1,61 +0,0 @@
 {{- if .Values.ingress.enabled -}}
 {{- $fullName := include "judgeflow.fullname" . -}}
 {{- $svcPort := .Values.service.port -}}
 {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
  {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
  {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
  {{- end }}
 {{- end }}
 {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
 apiVersion: networking.k8s.io/v1
 {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
 apiVersion: networking.k8s.io/v1beta1
 {{- else -}}
 apiVersion: extensions/v1beta1
 {{- end }}
 kind: Ingress
 metadata:
  name: {{ $fullName }}
  labels:
    {{- include "judgeflow.labels" . | nindent 4 }}
  {{- with .Values.ingress.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 spec:
  {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
  ingressClassName: {{ .Values.ingress.className }}
  {{- end }}
  {{- if .Values.ingress.tls }}
  tls:
    {{- range .Values.ingress.tls }}
    - hosts:
        {{- range .hosts }}
        - {{ . | quote }}
        {{- end }}
      secretName: {{ .secretName }}
    {{- end }}
  {{- end }}
  rules:
    {{- range .Values.ingress.hosts }}
    - host: {{ .host | quote }}
      http:
        paths:
          {{- range .paths }}
          - path: {{ .path }}
            {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
            pathType: {{ .pathType }}
            {{- end }}
            backend:
              {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
              service:
                name: {{ $fullName }}
                port:
                  number: {{ $svcPort }}
              {{- else }}
              serviceName: {{ $fullName }}
              servicePort: {{ $svcPort }}
              {{- end }}
          {{- end }}
    {{- end }}
 {{- end }}
--- a/helm-chart/asr-tco/templates/job.yaml
+++ b/helm-chart/asr-tco/templates/job.yaml
@@ -1,63 +0,0 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: {{ include "judgeflow.fullname" . }}
  labels:
    {{- include "judgeflow.labels" . | nindent 4 }}
    {{- with .Values.podLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
 spec:
  template:
    metadata:
      labels:
        {{- include "judgeflow.labels" . | nindent 8 }}
        {{- with .Values.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.priorityclassname }}
      priorityClassName: "{{ . }}"
      {{- end }}
      containers:
        - name: {{ .Chart.Name }}
          securityContext:
            {{- toYaml .Values.securityContext | nindent 12 }}
          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          {{- with .Values.env }}
          env:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- if and (hasKey .Values "service") (hasKey .Values.service "ports") }}
          ports:
            {{- range .Values.service.ports }}
              - name: {{ .name }}
                containerPort: {{ .port }}
            {{- end }}
          {{- end }}
          {{- if hasKey .Values "command" }}
          command: {{ .Values.command }}
          {{- end }}
          volumeMounts:
            {{- toYaml .Values.volumeMounts | nindent 12 }}
          resources:
            {{- toYaml .Values.resources | nindent 12 }}
      restartPolicy: Never
      {{- with .Values.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
  backoffLimit: 0
--- a/helm-chart/asr-tco/templates/priorityclass.yaml
+++ b/helm-chart/asr-tco/templates/priorityclass.yaml
@@ -1,10 +0,0 @@
 {{- if .Values.priorityclassname }}
 apiVersion: scheduling.k8s.io/v1
 kind: PriorityClass
 metadata:
  name: "{{ .Values.priorityclassname }}"
 value: {{ .Values.priorityclassvalue }}
 globalDefault: false
 preemptionPolicy: "Never"
 description: "This is a priority class."
 {{- end }}
--- a/helm-chart/asr-tco/templates/service.yaml
+++ b/helm-chart/asr-tco/templates/service.yaml
@@ -1,22 +0,0 @@
 {{- if and (hasKey .Values "service") (hasKey .Values.service "type") }}
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "judgeflow.fullname" . }}
  labels:
    {{- include "judgeflow.labels" . | nindent 4 }}
    {{- with .Values.podLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
 spec:
  type: {{ .Values.service.type }}
  ports:
    {{- range .Values.service.ports }}
      - port: {{ .port }}
        targetPort: {{ .port }}
        protocol: TCP
        name: {{ .name }}
    {{- end }}
  selector:
    {{- include "judgeflow.selectorLabels" . | nindent 4 }}
 {{- end }}
--- a/helm-chart/asr-tco/templates/serviceaccount.yaml
+++ b/helm-chart/asr-tco/templates/serviceaccount.yaml
@@ -1,13 +0,0 @@
 {{- if .Values.serviceAccount.create -}}
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ include "judgeflow.serviceAccountName" . }}
  labels:
    {{- include "judgeflow.labels" . | nindent 4 }}
  {{- with .Values.serviceAccount.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
 {{- end }}
--- a/helm-chart/asr-tco/templates/tests/test-connection.yaml
+++ b/helm-chart/asr-tco/templates/tests/test-connection.yaml
@@ -1,15 +0,0 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: {{ include "judgeflow.fullname" . }}-test-connection
  labels:
    {{- include "judgeflow.labels" . | nindent 4 }}
  annotations:
    "helm.sh/hook": test
 spec:
  containers:
    - name: wget
      image: busybox
      command: ['wget']
      args: ['{{ include "judgeflow.fullname" . }}:{{ .Values.service.port }}']
  restartPolicy: Never
--- a/helm-chart/asr-tco/values.yaml.tmpl
+++ b/helm-chart/asr-tco/values.yaml.tmpl
@@ -1,124 +0,0 @@
 # Default values for job_demo.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 replicaCount: 1
 image:
  repository: "${imageRepo}"
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  tag: "${imageTag}"
 imagePullSecrets: []
 nameOverride: ""
 fullnameOverride: ""
 serviceAccount:
  # Specifies whether a service account should be created
  create: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""
 podAnnotations: {}
 podLabels: 
  contest.4pd.io/leaderboard-resource-type: judge_flow
  contest.4pd.io/leaderboard-job-id: "0"
  contest.4pd.io/leaderboard-submit-id: "0"
 podSecurityContext: {}
  # fsGroup: 2000
 securityContext: {}
  # capabilities:
  #   drop:
  #   - ALL
  # readOnlyRootFilesystem: true
  # runAsNonRoot: true
  # runAsUser: 1000
 service:
  type: ClusterIP
  ports:
    - name: http
      port: 80
 ingress:
  enabled: false
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
 resources:
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  limits:
    cpu: 3000m
    memory: 16Gi
  requests:
    cpu: 3000m
    memory: 16Gi
 autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80
 nodeSelector:
  juicefs: "on"
  contest.4pd.io/cpu: INTEL-8358
 tolerations: []
 affinity: {}
 env:
  - name: TZ
    value: Asia/Shanghai
  - name: MY_POD_IP
    valueFrom:
      fieldRef:
        fieldPath: status.podIP
 #command: '["python","run.py"]'
 volumeMounts:
  - name: workspace
    mountPath: /tmp/workspace
  - name: datafile
    mountPath: /tmp/datafile
  - name: submit
    mountPath: /tmp/submit_config
  - name: juicefs-pv
    mountPath: /tmp/juicefs
  - name: customer
    mountPath: /tmp/customer
  - name: submit-private
    mountPath: /tmp/submit_private
 volumes:
  - name: juicefs-pv
    persistentVolumeClaim:
      claimName: juicefs-pvc
 priorityclassname: ''
 priorityclassvalue: '0'
--- a/helm-chart/sut/.DS_Store
+++ b/helm-chart/sut/.DS_Store
--- a/helm-chart/sut/.helmignore
+++ b/helm-chart/sut/.helmignore
@@ -1,23 +0,0 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/helm-chart/sut/Chart.yaml
+++ b/helm-chart/sut/Chart.yaml
@@ -1,24 +0,0 @@
 apiVersion: v2
 name: sut
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "0.1.0"
--- a/helm-chart/sut/templates/_helpers.tpl
+++ b/helm-chart/sut/templates/_helpers.tpl
@@ -1,62 +0,0 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "sut.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "sut.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "sut.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "sut.labels" -}}
 helm.sh/chart: {{ include "sut.chart" . }}
 {{ include "sut.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "sut.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "sut.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "sut.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "sut.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/helm-chart/sut/templates/deployment.yaml
+++ b/helm-chart/sut/templates/deployment.yaml
@@ -1,94 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "sut.fullname" . }}
  labels:
    {{- include "sut.labels" . | nindent 4 }}
    {{- with .Values.podLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
 spec:
  {{- if not .Values.autoscaling.enabled }}
  replicas: {{ .Values.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "sut.selectorLabels" . | nindent 6 }}
  template:
    metadata:
      {{- with .Values.podAnnotations }}
      annotations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "sut.labels" . | nindent 8 }}
        {{- with .Values.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "sut.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.podSecurityContext | nindent 8 }}
      {{- with .Values.priorityclassname }}
      priorityClassName: "{{ . }}"
      {{- end }}
      containers:
        - name: {{ .Chart.Name }}
          securityContext:
            {{- toYaml .Values.securityContext | nindent 12 }}
          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          {{- with .Values.env }}
          env:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          ports:
            - name: http
              containerPort: {{ .Values.service.port }}
              protocol: TCP
          {{- with .Values.command }}
          command: 
            {{- toYaml . | nindent 12 }}
          {{- end }}
          resources:
            {{- toYaml .Values.resources | nindent 12 }}
          {{- with .Values.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.livenessProbe }}
          livenessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.readinessProbe }}
          readinessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.startupProbe }}
          startupProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      volumes:
        {{- with .Values.volumes }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      tolerations:
        - key: "hosttype"
          operator: "Equal"
          value: "iluvatar"
          effect: "NoSchedule"
--- a/helm-chart/sut/templates/hpa.yaml
+++ b/helm-chart/sut/templates/hpa.yaml
@@ -1,32 +0,0 @@
 {{- if .Values.autoscaling.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: {{ include "sut.fullname" . }}
  labels:
    {{- include "sut.labels" . | nindent 4 }}
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "sut.fullname" . }}
  minReplicas: {{ .Values.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
 {{- end }}
--- a/helm-chart/sut/templates/ingress.yaml
+++ b/helm-chart/sut/templates/ingress.yaml
@@ -1,61 +0,0 @@
 {{- if .Values.ingress.enabled -}}
 {{- $fullName := include "sut.fullname" . -}}
 {{- $svcPort := .Values.service.port -}}
 {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
  {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
  {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
  {{- end }}
 {{- end }}
 {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
 apiVersion: networking.k8s.io/v1
 {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
 apiVersion: networking.k8s.io/v1beta1
 {{- else -}}
 apiVersion: extensions/v1beta1
 {{- end }}
 kind: Ingress
 metadata:
  name: {{ $fullName }}
  labels:
    {{- include "sut.labels" . | nindent 4 }}
  {{- with .Values.ingress.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 spec:
  {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
  ingressClassName: {{ .Values.ingress.className }}
  {{- end }}
  {{- if .Values.ingress.tls }}
  tls:
    {{- range .Values.ingress.tls }}
    - hosts:
        {{- range .hosts }}
        - {{ . | quote }}
        {{- end }}
      secretName: {{ .secretName }}
    {{- end }}
  {{- end }}
  rules:
    {{- range .Values.ingress.hosts }}
    - host: {{ .host | quote }}
      http:
        paths:
          {{- range .paths }}
          - path: {{ .path }}
            {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
            pathType: {{ .pathType }}
            {{- end }}
            backend:
              {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
              service:
                name: {{ $fullName }}
                port:
                  number: {{ $svcPort }}
              {{- else }}
              serviceName: {{ $fullName }}
              servicePort: {{ $svcPort }}
              {{- end }}
          {{- end }}
    {{- end }}
 {{- end }}
--- a/helm-chart/sut/templates/service.yaml
+++ b/helm-chart/sut/templates/service.yaml
@@ -1,18 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "sut.fullname" . }}
  labels:
    {{- include "sut.labels" . | nindent 4 }}
    {{- with .Values.podLabels }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
 spec:
  type: {{ .Values.service.type }}
  ports:
    - port: {{ .Values.service.port }}
      targetPort: http
      protocol: TCP
      name: socket
  selector:
    {{- include "sut.selectorLabels" . | nindent 4 }}
--- a/helm-chart/sut/templates/serviceaccount.yaml
+++ b/helm-chart/sut/templates/serviceaccount.yaml
@@ -1,13 +0,0 @@
 {{- if .Values.serviceAccount.create -}}
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ include "sut.serviceAccountName" . }}
  labels:
    {{- include "sut.labels" . | nindent 4 }}
  {{- with .Values.serviceAccount.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
 {{- end }}
--- a/helm-chart/sut/templates/tests/test-connection.yaml
+++ b/helm-chart/sut/templates/tests/test-connection.yaml
@@ -1,15 +0,0 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: "{{ include "sut.fullname" . }}-test-connection"
  labels:
    {{- include "sut.labels" . | nindent 4 }}
  annotations:
    "helm.sh/hook": test
 spec:
  containers:
    - name: wget
      image: busybox
      command: ['wget']
      args: ['{{ include "sut.fullname" . }}:{{ .Values.service.port }}']
  restartPolicy: Never
--- a/helm-chart/sut/values.yaml.tmpl
+++ b/helm-chart/sut/values.yaml.tmpl
@@ -1,144 +0,0 @@
 # Default values for sut.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 replicaCount: 1
 image:
  repository: harbor.4pd.io/lab-platform/inf/python
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  tag: 3.9
 imagePullSecrets: []
 nameOverride: ""
 fullnameOverride: ""
 serviceAccount:
  # Specifies whether a service account should be created
  create: true
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""
 podAnnotations: {}
 podLabels: {}
 podSecurityContext: {}
  # fsGroup: 2000
 securityContext: {}
  # capabilities:
  #   drop:
  #   - ALL
  # readOnlyRootFilesystem: true
  # runAsNonRoot: true
  # runAsUser: 1000
 service:
  type: ClusterIP
  port: 80
 ingress:
  enabled: false
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
 resources:
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  limits:
    cpu: 1000m
    memory: 4096Mi
  requests:
    cpu: 1000m
    memory: 4096Mi
 autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80
 # Additional volumes on the output Deployment definition.
 volumes: []
 # - name: foo
 #   secret:
 #     secretName: mysecret
 #     optional: false
 # Additional volumeMounts on the output Deployment definition.
 volumeMounts: []
 # - name: foo
 #   mountPath: "/etc/foo"
 #   readOnly: true
 nodeSelector:
  contest.4pd.io/accelerator: iluvatar-BI-V100
 tolerations: 
  - key: hosttype
    operator: Equal
    value: iluvatar
    effect: NoSchedule
 affinity: {}
 readinessProbe:
  failureThreshold: 1000
  httpGet:
    path: /health
    port: 80
    scheme: HTTP
 #readinessProbe:
 #  httpGet:
 #    path: /health
 #    port: 80
 #    scheme: HTTP
 #  initialDelaySeconds: 5   # 应用启动后等待 5 秒再开始探测
 #  failureThreshold: 5      # 连续失败 3 次后标记为未就绪
 #  successThreshold: 1      # 连续成功 1 次后标记为就绪
 env:
  - name: TZ
    value: Asia/Shanghai
  - name: MY_POD_NAME
    valueFrom:
      fieldRef:
        fieldPath: metadata.name
  - name: MY_POD_NAMESPACE
    valueFrom:
      fieldRef:
        fieldPath: metadata.namespace
  - name: MY_POD_IP
    valueFrom:
      fieldRef:
        fieldPath: status.podIP
  - name: MY_NODE_IP
    valueFrom:
      fieldRef:
        fieldPath: status.hostIP
 #command: ''
 priorityclassname: ''
--- a/local_test.py
+++ b/local_test.py
@@ -1,64 +0,0 @@
 import os
 import tempfile
 import shutil
 if os.path.exists("/tmp/submit_private"):
  shutil.rmtree("/tmp/submit_private")
 with tempfile.TemporaryDirectory() as tempdir:
  config_path = os.path.join(tempdir, "config.json")
  assert not os.system(f"ssh-keygen -f {tempdir}/ssh-key-ecdsa -t ecdsa -b 521 -q -N \"\"")
  config = """
  model: whisper
  model_key: whisper
  config.json:
      name: 'faster-whisper-server:latest'
      support_devices:
      - cpu
      model_path: ''
      port: 8080
      other_ports: []
      other_ports_count: 1
      entrypoint: start.bat
      MIN_CHUNK: 2.5
      MIN_ADD_CHUNK: 2.5
      COMPUTE_TYPE: int8
      NUM_WORKERS: 1
      CPU_THREADS: 2
      BEAM_SIZE: 5
      BATCH: 1
      LANG: auto
      DEVICE: cpu
      CHUNK_LENGTH: 5
      CLASS_MODEL: ./models/faster-whisper-base
      EN_MODEL: ./models/faster-whisper-base
      ZH_MODEL: ./models/faster-whisper-base
      RU_MODEL: ./models/faster-whisper-base
      PT_MODEL: ./models/faster-whisper-base
      AR_MODEL: ./models/faster-whisper-base
      NEW_VERSION: 1
      NEED_RESET: 0
  leaderboard_options:
    nfs:
      - name: whisper
        srcRelativePath: leaderboard/pc_asr/en.tar.gz
        mountPoint: /tmp
        source: ceph_customer
  """
  with open(config_path, "w") as f:
    f.write(config)
  os.environ["SSH_KEY_DIR"] = tempdir
  os.environ["SUBMIT_CONFIG_FILEPATH"] = config_path
  os.environ["MODEL_MAPPING"] = '{"whisper": "edge-ml.tar.gz"}'
  from run_async_a10 import get_sut_url_windows
  print(get_sut_url_windows())
  import time
  time.sleep(3600)
--- a/mock_env.sh
+++ b/mock_env.sh
@@ -1,8 +0,0 @@
 #!/bin/bash
 export DATASET_FILEPATH=dataset/formatted1/de.zip
 export RESULT_FILEPATH=out/result.json
 export DETAILED_CASES_FILEPATH=out/detail_cases.json
 export SUBMIT_CONFIG_FILEPATH=
 export BENCHMARK_NAME=
 export MY_POD_IP=127.0.0.1
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,24 +0,0 @@
 [tool.black]
 line-length = 80
 target-version = ['py39']
 [tool.flake8]
 max-line-length = 88
 count=true
 per-file-ignores="./annotation/manager.py:F401"
 exclude=["./label", "__pycache__", "./migrations", "./logs", "./pids", "./resources"]
 ignore=["W503", "E203"]
 enable-extensions="G"
 application-import-names=["flake8-isort", "flake8-logging-format", "flake8-builtins"]
 import-order-style="edited"
 extend-ignore = ["E203", "E701"]
 [tool.isort]
 py_version=39
 profile="black"
 multi_line_output=9
 line_length=80
 group_by_package=true
 case_sensitive=true
 skip_gitignore=true
--- a/run.py
+++ b/run.py
@@ -1,114 +0,0 @@
 import gc
 import json
 import os
 import sys
 import time
 import zipfile
 import yaml
 from schemas.context import ASRContext
 from utils.client import Client
 from utils.evaluator import BaseEvaluator
 from utils.logger import logger
 from utils.service import register_sut
 IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None
 UNIT_TEST = os.getenv("UNIT_TEST", 0)
 def main():
    logger.info("执行……")
    dataset_filepath = os.getenv(
        "DATASET_FILEPATH",
        "./tests/resources/en.zip",
    )
    submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config")
    result_filepath = os.getenv("RESULT_FILEPATH", "./out/result")
    bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase")
    detail_cases_filepath = os.getenv("DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl")
    resource_name = os.getenv("BENCHMARK_NAME")
    # 提交配置 & 启动被测服务
    if os.getenv("DATASET_FILEPATH", ""):
        from utils.helm import resource_check
        with open(submit_config_filepath, "r") as fp:
            st_config = yaml.safe_load(fp)
            st_config["values"] = resource_check(st_config.get("values", {}))
        if 'docker_images' in st_config:
            sut_url = "ws://172.26.1.75:9827"
            os.environ['test'] = '1'
        elif 'docker_image' in st_config:
            sut_url = register_sut(st_config, resource_name)
        elif UNIT_TEST:
            sut_url = "ws://172.27.231.36:80"
        else:
            logger.error("config 配置错误，没有 docker_image")
            os._exit(1)
    else:
        os.environ['test'] = '1'
        sut_url = "ws://172.27.231.36:80"
        if UNIT_TEST:
            exit(0)
    """
    # 数据集处理
    local_dataset_path = "./dataset"
    os.makedirs(local_dataset_path, exist_ok=True)
    with zipfile.ZipFile(dataset_filepath) as zf:
        zf.extractall(local_dataset_path)
    config_path = os.path.join(local_dataset_path, "data.yaml")
    with open(config_path, "r") as fp:
        dataset_config = yaml.safe_load(fp)
    # 数据集信息
    dataset_global_config = dataset_config.get("global", {})
    dataset_query = dataset_config.get("query_data", {})
    evaluator = BaseEvaluator()
    # 开始预测
    for idx, query_item in enumerate(dataset_query):
        gc.collect()
        logger.info(f"开始执行 {idx} 条数据")
        context = ASRContext(**dataset_global_config)
        context.lang = query_item.get("lang", context.lang)
        context.file_path = os.path.join(local_dataset_path, query_item["file"])
        # context.audio_length = query_item["audio_length"]
        interactions = Client(sut_url, context).action()
        context.append_labels(query_item["voice"])
        context.append_preds(
            interactions["predict_data"],
            interactions["send_time"],
            interactions["recv_time"],
        )
        context.fail = interactions["fail"]
        if IN_TEST:
            with open('output.txt', 'w') as fp:
                original_stdout = sys.stdout
                sys.stdout = fp
                print(context)
                sys.stdout = original_stdout
        evaluator.evaluate(context)
        detail_case = evaluator.gen_detail_case()
        with open(detail_cases_filepath, "a") as fp:
            fp.write(json.dumps(detail_case.to_dict(), ensure_ascii=False) + "\n")
        time.sleep(4)
    evaluator.post_evaluate()
    output_result = evaluator.gen_result()
    # print(evaluator.__dict__)
    logger.info("执行完成. Result = {output_result}")
    with open(result_filepath, "w") as fp:
        json.dump(output_result, fp, indent=2, ensure_ascii=False)
    with open(bad_cases_filepath, "w") as fp:
        fp.write("当前榜单不存在 Bad Case\n")
    """
 if __name__ == "__main__":
    main()
--- a/run_async_a10.py
+++ b/run_async_a10.py
@@ -1,757 +0,0 @@
 import atexit
 import concurrent.futures
 import fcntl
 import gc
 import glob
 import json
 import os
 import random
 import signal
 import sys
 import tempfile
 import threading
 import time
 import zipfile
 from concurrent.futures import ThreadPoolExecutor
 import yaml
 from fabric import Connection
 from vmplatform import VMOS, Client, VMDataDisk
 from schemas.context import ASRContext
 from utils.client_async import ClientAsync
 from utils.evaluator import BaseEvaluator
 from utils.logger import logger
 from utils.service import register_sut
 IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None
 UNIT_TEST = os.getenv("UNIT_TEST", 0)
 DATASET_NUM = os.getenv("DATASET_NUM")
 # vm榜单参数
 SUT_TYPE = os.getenv("SUT_TYPE", "kubernetes")
 SHARE_SUT = os.getenv("SHARE_SUT", "true") == "true"
 VM_ID = 0
 VM_IP = ""
 do_deploy_chart = True
 VM_CPU = int(os.getenv("VM_CPU", "2"))
 VM_MEM = int(os.getenv("VM_MEM", "4096"))
 MODEL_BASEPATH = os.getenv("MODEL_BASEPATH", "/tmp/customer/leaderboard/pc_asr")
 MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
 SSH_KEY_DIR = os.getenv("SSH_KEY_DIR", "/workspace")
 SSH_PUBLIC_KEY_FILE = os.path.join(SSH_KEY_DIR, "ssh-key-ecdsa.pub")
 SSH_KEY_FILE = os.path.join(SSH_KEY_DIR, "ssh-key-ecdsa")
 CONNECT_KWARGS = {"key_filename": SSH_KEY_FILE}
 # 共享sut参数
 JOB_ID = os.getenv("JOB_ID")
 dirname = "/tmp/submit_private/sut_share"
 os.makedirs(dirname, exist_ok=True)
 SUT_SHARE_LOCK = os.path.join(dirname, "lock.lock")
 SUT_SHARE_USE_LOCK = os.path.join(dirname, "use.lock")
 SUT_SHARE_STATUS = os.path.join(dirname, "status.json")
 SUT_SHARE_JOB_STATUS = os.path.join(dirname, f"job_status.{JOB_ID}")
 SUT_SHARE_PUBLIC_FAIL = os.path.join(dirname, "one_job_failed")
 fd_lock = open(SUT_SHARE_USE_LOCK, "a")
 def clean_vm_atexit():
    global VM_ID, do_deploy_chart
    if not VM_ID:
        return
    if not do_deploy_chart:
        return
    logger.info("删除vm")
    vmclient = Client()
    err_msg = vmclient.delete_vm(VM_ID)
    if err_msg:
        logger.warning(f"删除vm失败: {err_msg}")
 def put_file_to_vm(c: Connection, local_path: str, remote_path: str):
    logger.info(f"uploading file {local_path} to {remote_path}")
    result = c.put(local_path, remote_path)
    logger.info("uploaded {0.local} to {0.remote}".format(result))
 def deploy_windows_sut():
    global VM_ID
    global VM_IP
    submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "")
    with open(submit_config_filepath, "r") as fp:
        st_config = yaml.safe_load(fp)
    assert "model" in st_config, "未配置model"
    assert "model_key" in st_config, "未配置model_key"
    assert "config.json" in st_config, "未配置config.json"
    nfs = st_config.get("leaderboard_options", {}).get("nfs", [])
    assert len(nfs) > 0, "未配置nfs"
    assert st_config["model"] in MODEL_MAPPING, "提交模型不在可用模型范围内"
    model = st_config["model"]
    model_key = st_config["model_key"]
    model_path = ""
    config = st_config["config.json"]
    exist = False
    for nfs_item in nfs:
        if nfs_item["name"] == model_key:
            exist = True
            if nfs_item["source"] == "ceph_customer":
                model_path = os.path.join(
                    "/tmp/customer",
                    nfs_item["srcRelativePath"],
                )
            else:
                model_path = os.path.join(
                    "/tmp/juicefs",
                    nfs_item["srcRelativePath"],
                )
            break
    if not exist:
        raise RuntimeError(f"未找到nfs配置项 name={model_key}")
    config_path = os.path.join(tempfile.mkdtemp(), "config.json")
    model_dir = os.path.basename(model_path).split(".")[0]
    config["model_path"] = f"E:\\model\\{model_dir}"
    with open(config_path, "w") as fp:
        json.dump(config, fp, ensure_ascii=False, indent=4)
    vmclient = Client()
    with open(SSH_PUBLIC_KEY_FILE, "r") as fp:
        sshpublickey = fp.read().rstrip()
    VM_ID = vmclient.create_vm(
        "amd64",
        VMOS.windows10,
        VM_CPU,
        VM_MEM,
        "leaderboard-%s-submit-%s-job-%s"
        % (
            os.getenv("BENCHMARK_NAME"),
            os.getenv("SUBMIT_ID"),
            os.getenv("JOB_ID"),
        ),
        sshpublickey,
        datadisks=[
            VMDataDisk(
                size=50,
                disk_type="ssd",
                mount_path="/",
                filesystem="NTFS",
            )
        ],
    )
    atexit.register(clean_vm_atexit)
    signal.signal(signal.SIGTERM, lambda signum, _: sys.exit(signum))
    VM_IP = vmclient.wait_until_vm_running(VM_ID)
    logger.info("vm created successfully, vm_ip: %s", VM_IP)
    def sut_startup():
        with Connection(
            VM_IP,
            "administrator",
            connect_kwargs=CONNECT_KWARGS,
        ) as c:
            script_path = "E:\\base\\asr\\faster-whisper\\server"
            script_path = "E:\\install\\asr\\sensevoice\\server"
            bat_filepath = f"{script_path}\\start.bat"
            config_filepath = "E:\\submit\\config.json"
            result = c.run("")
            assert result.ok
            c.run(
                f'cd /d {script_path} & set "EDGE_ML_ENV_HOME=E:\\install" & {bat_filepath} {config_filepath}',
                warn=True,
            )
    with Connection(
        VM_IP,
        "administrator",
        connect_kwargs=CONNECT_KWARGS,
    ) as c:
        model_filepath = os.path.join(MODEL_BASEPATH, MODEL_MAPPING[model])
        filename = os.path.basename(model_filepath)
        put_file_to_vm(c, model_filepath, "/E:/")
        result = c.run("mkdir E:\\base")
        assert result.ok
        result = c.run("mkdir E:\\model")
        assert result.ok
        result = c.run("mkdir E:\\submit")
        assert result.ok
        result = c.run(
            f"tar zxvf E:\\{filename} -C E:\\base --strip-components 1"
        )
        assert result.ok
        result = c.run("E:\\base\\setup-win.bat E:\\install")
        assert result.ok
        put_file_to_vm(c, config_path, "/E:/submit")
        put_file_to_vm(c, model_path, "/E:/model")
        result = c.run(
            f"tar zxvf E:\\model\\{os.path.basename(model_path)} -C E:\\model"
        )
        assert result.ok
        threading.Thread(target=sut_startup, daemon=True).start()
        time.sleep(60)
    return f"ws://{VM_IP}:{config['port']}"
 def deploy_macos_sut():
    global VM_ID
    global VM_IP
    submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "")
    with open(submit_config_filepath, "r") as fp:
        st_config = yaml.safe_load(fp)
    assert "model" in st_config, "未配置model"
    assert "model_key" in st_config, "未配置model_key"
    assert "config.json" in st_config, "未配置config.json"
    nfs = st_config.get("leaderboard_options", {}).get("nfs", [])
    assert len(nfs) > 0, "未配置nfs"
    assert st_config["model"] in MODEL_MAPPING, "提交模型不在可用模型范围内"
    model = st_config["model"]
    model_key = st_config["model_key"]
    model_path = ""
    config = st_config["config.json"]
    exist = False
    for nfs_item in nfs:
        if nfs_item["name"] == model_key:
            exist = True
            if nfs_item["source"] == "ceph_customer":
                model_path = os.path.join(
                    "/tmp/customer",
                    nfs_item["srcRelativePath"],
                )
            else:
                model_path = os.path.join(
                    "/tmp/juicefs",
                    nfs_item["srcRelativePath"],
                )
            break
    if not exist:
        raise RuntimeError(f"未找到nfs配置项 name={model_key}")
    config_path = os.path.join(tempfile.mkdtemp(), "config.json")
    model_dir = os.path.basename(model_path).split(".")[0]
    vmclient = Client()
    with open(SSH_PUBLIC_KEY_FILE, "r") as fp:
        sshpublickey = fp.read().rstrip()
    VM_ID = vmclient.create_vm(
        "amd64",
        VMOS.macos12,
        VM_CPU,
        VM_MEM,
        "leaderboard-%s-submit-%s-job-%s"
        % (
            os.getenv("BENCHMARK_NAME"),
            os.getenv("SUBMIT_ID"),
            os.getenv("JOB_ID"),
        ),
        sshpublickey,
        datadisks=[
            VMDataDisk(
                size=50,
                disk_type="ssd",
                mount_path="/",
                filesystem="apfs",
            )
        ],
    )
    atexit.register(clean_vm_atexit)
    signal.signal(signal.SIGTERM, lambda signum, _: sys.exit(signum))
    VM_IP = vmclient.wait_until_vm_running(VM_ID)
    logger.info("vm created successfully, vm_ip: %s", VM_IP)
    with Connection(
        VM_IP,
        "admin",
        connect_kwargs=CONNECT_KWARGS,
    ) as c:
        result = c.run("ls -d /Volumes/data*")
        assert result.ok
        volume_path = result.stdout.strip()
    config["model_path"] = f"{volume_path}/model/{model_dir}"
    with open(config_path, "w") as fp:
        json.dump(config, fp, ensure_ascii=False, indent=4)
    def sut_startup():
        with Connection(
            VM_IP,
            "admin",
            connect_kwargs=CONNECT_KWARGS,
        ) as c:
            script_path = f"{volume_path}/install/asr/sensevoice/server"
            startsh = f"{script_path}/start.sh"
            config_filepath = f"{volume_path}/submit/config.json"
            c.run(
                f"cd {script_path} && sh {startsh} {config_filepath}",
                warn=True,
            )
    with Connection(
        VM_IP,
        "admin",
        connect_kwargs=CONNECT_KWARGS,
    ) as c:
        model_filepath = os.path.join(MODEL_BASEPATH, MODEL_MAPPING[model])
        filename = os.path.basename(model_filepath)
        put_file_to_vm(c, model_filepath, f"{volume_path}")
        result = c.run(f"mkdir {volume_path}/base")
        assert result.ok
        result = c.run(f"mkdir {volume_path}/model")
        assert result.ok
        result = c.run(f"mkdir {volume_path}/submit")
        assert result.ok
        result = c.run(
            f"tar zxvf {volume_path}/{filename} -C {volume_path}/base --strip-components 1"  # noqa: E501
        )
        assert result.ok
        result = c.run(
            f"sh {volume_path}/base/setup-mac.sh {volume_path}/install x64"
        )
        assert result.ok
        put_file_to_vm(c, config_path, f"{volume_path}/submit")
        put_file_to_vm(c, model_path, f"{volume_path}/model")
        result = c.run(
            f"tar zxvf {volume_path}/model/{os.path.basename(model_path)} -C {volume_path}/model"  # noqa: E501
        )
        assert result.ok
        threading.Thread(target=sut_startup, daemon=True).start()
        time.sleep(60)
    return f"ws://{VM_IP}:{config['port']}"
 def get_sut_url_vm(vm_type: str):
    global VM_ID
    global VM_IP
    global do_deploy_chart
    do_deploy_chart = True
    # 拉起SUT
    def check_job_failed():
        while True:
            time.sleep(30)
            if os.path.exists(SUT_SHARE_PUBLIC_FAIL):
                logger.error("there is a job failed in current submit")
                sys.exit(1)
    sut_url = ""
    threading.Thread(target=check_job_failed, daemon=True).start()
    if SHARE_SUT:
        time.sleep(10 * random.random())
        try:
            open(SUT_SHARE_LOCK, "x").close()
        except Exception:
            do_deploy_chart = False
        start_at = time.time()
        def file_last_updated_at(file: str):
            return os.stat(file).st_mtime if os.path.exists(file) else start_at
        if not do_deploy_chart:
            with open(SUT_SHARE_JOB_STATUS, "w") as f:
                f.write("waiting")
            while (
                time.time() - file_last_updated_at(SUT_SHARE_STATUS)
                <= 60 * 60 * 24
            ):
                logger.info(
                    "Waiting sut application to be deployed by another job"
                )
                time.sleep(10 + random.random())
                if os.path.exists(SUT_SHARE_STATUS):
                    get_status = False
                    for _ in range(10):
                        try:
                            with open(SUT_SHARE_STATUS, "r") as f:
                                status = json.load(f)
                                get_status = True
                                break
                        except Exception:
                            time.sleep(1 + random.random())
                            continue
                    if not get_status:
                        raise RuntimeError(
                            "Failed to get status of sut application"
                        )
                    assert (
                        status.get("status") != "failed"
                    ), "Failed to deploy sut application, \
 please check other job logs"
                    if status.get("status") == "running":
                        VM_ID = status.get("vmid")
                        VM_IP = status.get("vmip")
                        sut_url = status.get("sut_url")
                        with open(SSH_PUBLIC_KEY_FILE, "w") as fp:
                            fp.write(status.get("pubkey"))
                        with open(SSH_KEY_FILE, "w") as fp:
                            fp.write(status.get("prikey"))
                        logger.info("Successfully get deployed sut application")
                        break
    if do_deploy_chart:
        try:
            fcntl.flock(fd_lock, fcntl.LOCK_EX)
            with open(SUT_SHARE_JOB_STATUS, "w") as f:
                f.write("waiting")
            pending = True
            def update_status():
                while pending:
                    time.sleep(30)
                    if not pending:
                        break
                    with open(SUT_SHARE_STATUS, "w") as f:
                        json.dump({"status": "pending"}, f)
            threading.Thread(target=update_status, daemon=True).start()
            if vm_type == "windows":
                sut_url = deploy_windows_sut()
            else:
                sut_url = deploy_macos_sut()
        except Exception:
            open(SUT_SHARE_PUBLIC_FAIL, "w").close()
            with open(SUT_SHARE_STATUS, "w") as f:
                json.dump({"status": "failed"}, f)
            raise
        finally:
            pending = False
        with open(SUT_SHARE_STATUS, "w") as f:
            pubkey = ""
            with open(SSH_PUBLIC_KEY_FILE, "r") as fp:
                pubkey = fp.read().rstrip()
            prikey = ""
            with open(SSH_KEY_FILE, "r") as fp:
                prikey = fp.read()
            json.dump(
                {
                    "status": "running",
                    "vmid": VM_ID,
                    "vmip": VM_IP,
                    "pubkey": pubkey,
                    "sut_url": sut_url,
                    "prikey": prikey,
                },
                f,
            )
    else:
        while True:
            time.sleep(5 + random.random())
            try:
                fcntl.flock(fd_lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
                break
            except Exception:
                logger.info("尝试抢占调用sut失败，继续等待 5s ...")
    with open(SUT_SHARE_JOB_STATUS, "w") as f:
        f.write("running")
    return sut_url
 def get_sut_url():
    if SUT_TYPE in ("windows", "macos"):
        return get_sut_url_vm(SUT_TYPE)
    submit_config_filepath = os.getenv(
        "SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config"
    )
    CPU = os.getenv("SUT_CPU", "2")
    MEMORY = os.getenv("SUT_MEMORY", "4Gi")
    resource_name = os.getenv("BENCHMARK_NAME")
    # 任务信息
    # 斯拉夫语族：俄语、波兰语
    # 日耳曼语族：英语、德语、荷兰语
    # 拉丁语族（罗曼语族）：西班牙语、葡萄牙语、法国语、意大利语
    # 闪米特语族：阿拉伯语、希伯来语
    # 提交配置 & 启动被测服务
    if os.getenv("DATASET_FILEPATH", ""):
        with open(submit_config_filepath, "r") as fp:
            st_config = yaml.safe_load(fp)
            if "values" not in st_config:
                st_config["values"] = {}
            st_config["values"]["resources"] = {}
            st_config["values"]["resources"]["limits"] = {}
            st_config["values"]["resources"]["limits"]["cpu"] = CPU
            st_config["values"]["resources"]["limits"]["memory"] = MEMORY
            # st_config["values"]['resources']['limits']['nvidia.com/gpu'] = '1'
            # st_config["values"]['resources']['limits']['nvidia.com/gpumem'] = "1843"
            # st_config["values"]['resources']['limits']['nvidia.com/gpucores'] = "8"
            st_config["values"]["resources"]["requests"] = {}
            st_config["values"]["resources"]["requests"]["cpu"] = CPU
            st_config["values"]["resources"]["requests"]["memory"] = MEMORY
            # st_config["values"]['resources']['requests']['nvidia.com/gpu'] = '1'
            # st_config["values"]['resources']['requests']['nvidia.com/gpumem'] = "1843"
            # st_config["values"]['resources']['requests']['nvidia.com/gpucores'] = "8"
            # st_config['values']['nodeSelector'] = {}
            # st_config["values"]["nodeSelector"][
            #     "contest.4pd.io/accelerator"
            # ] = "A10vgpu"
            # st_config['values']['tolerations'] = []
            # toleration_item = {}
            # toleration_item['key'] = 'hosttype'
            # toleration_item['operator'] = 'Equal'
            # toleration_item['value'] = 'vgpu'
            # toleration_item['effect'] = 'NoSchedule'
            # st_config['values']['tolerations'].append(toleration_item)
            if os.getenv("RESOURCE_TYPE", "cpu") == "cpu":
                values = st_config["values"]
                limits = values.get("resources", {}).get("limits", {})
                requests = values.get("resources", {}).get("requests", {})
                if (
                    "nvidia.com/gpu" in limits
                    or "nvidia.com/gpumem" in limits
                    or "nvidia.com/gpucores" in limits
                    or "nvidia.com/gpu" in requests
                    or "nvidia.com/gpumem" in requests
                    or "nvidia.com/gpucores" in requests
                ):
                    raise Exception("禁止使用GPU!")
            else:
                vgpu_num = int(os.getenv("SUT_VGPU", "3"))
                st_config["values"]["resources"]["limits"]["nvidia.com/gpu"] = (
                    str(vgpu_num)
                )
                st_config["values"]["resources"]["limits"][
                    "nvidia.com/gpumem"
                ] = str(1843 * vgpu_num)
                st_config["values"]["resources"]["limits"][
                    "nvidia.com/gpucores"
                ] = str(8 * vgpu_num)
                st_config["values"]["resources"]["requests"][
                    "nvidia.com/gpu"
                ] = str(vgpu_num)
                st_config["values"]["resources"]["requests"][
                    "nvidia.com/gpumem"
                ] = str(1843 * vgpu_num)
                st_config["values"]["resources"]["requests"][
                    "nvidia.com/gpucores"
                ] = str(8 * vgpu_num)
                st_config["values"]["nodeSelector"] = {}
                st_config["values"]["nodeSelector"][
                    "contest.4pd.io/accelerator"
                ] = "A10vgpu"
                st_config["values"]["tolerations"] = []
                toleration_item = {}
                toleration_item["key"] = "hosttype"
                toleration_item["operator"] = "Equal"
                toleration_item["value"] = "vgpu"
                toleration_item["effect"] = "NoSchedule"
                st_config["values"]["tolerations"].append(toleration_item)
        if "docker_images" in st_config:
            sut_url = "ws://172.26.1.75:9827"
            os.environ["test"] = "1"
        elif "docker_image" in st_config:
            sut_url = register_sut(st_config, resource_name)
        elif UNIT_TEST:
            sut_url = "ws://172.27.231.36:80"
        else:
            logger.error("config 配置错误，没有 docker_image")
            os._exit(1)
        return sut_url
    else:
        os.environ["test"] = "1"
        sut_url = "ws://172.27.231.36:80"
        sut_url = "ws://172.26.1.75:9827"
        return sut_url
 def load_merge_dataset(dataset_filepath: str) -> dict:
    local_dataset_path = "./dataset"
    os.makedirs(local_dataset_path, exist_ok=True)
    with zipfile.ZipFile(dataset_filepath) as zf:
        zf.extractall(local_dataset_path)
    config = {}
    sub_datasets = os.listdir(local_dataset_path)
    for sub_dataset in sub_datasets:
        if sub_dataset.startswith("asr."):
            lang = sub_dataset[4:]
            lang_path = os.path.join(local_dataset_path, lang)
            os.makedirs(lang_path, exist_ok=True)
            with zipfile.ZipFile(
                os.path.join(local_dataset_path, sub_dataset)
            ) as zf:
                zf.extractall(lang_path)
            lang_config_path = os.path.join(lang_path, "data.yaml")
            with open(lang_config_path, "r") as fp:
                lang_config = yaml.safe_load(fp)
                audio_lengths = {}
                for query_item in lang_config.get("query_data", []):
                    audio_path = os.path.join(
                        lang_path,
                        query_item["file"],
                    )
                    query_item["file"] = audio_path
                    audio_lengths[query_item["file"]] = os.path.getsize(
                        audio_path,
                    )
                lang_config["query_data"] = sorted(
                    lang_config.get("query_data", []),
                    key=lambda x: audio_lengths[x["file"]],
                    reverse=True,
                )
                idx = 0
                length = 0.0
                for query_item in lang_config["query_data"]:
                    audio_length = audio_lengths[query_item["file"]]
                    length += audio_length / 32000
                    idx += 1
                    # 每个语言限制半个小时长度
                    if length >= 30 * 60:
                        break
                lang_config["query_data"] = lang_config["query_data"][:idx]
                config[lang] = lang_config
    config["query_data"] = []
    for lang, lang_config in config.items():
        if lang == "query_data":
            continue
        for query_item in lang_config["query_data"]:
            config["query_data"].append(
                {
                    **query_item,
                    "lang": lang,
                }
            )
    random.Random(0).shuffle(config["query_data"])
    return config
 def postprocess_failed():
    open(SUT_SHARE_PUBLIC_FAIL, "w").close()
 def main():
    dataset_filepath = os.getenv(
        "DATASET_FILEPATH",
        "/Users/4paradigm/Projects/dataset/asr/de.zip",
        # "./tests/resources/en.zip",
    )
    result_filepath = os.getenv("RESULT_FILEPATH", "./out/result")
    bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase")
    detail_cases_filepath = os.getenv(
        "DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl"
    )
    thread_num = int(os.getenv("THREAD_NUM", "1"))
    # 数据集处理
    config = {}
    if os.getenv("MERGE_DATASET", "1"):
        config = load_merge_dataset(dataset_filepath)
        dataset_query = config["query_data"]
    else:
        local_dataset_path = "./dataset"
        os.makedirs(local_dataset_path, exist_ok=True)
        with zipfile.ZipFile(dataset_filepath) as zf:
            zf.extractall(local_dataset_path)
        config_path = os.path.join(local_dataset_path, "data.yaml")
        with open(config_path, "r") as fp:
            dataset_config = yaml.safe_load(fp)
        # 读取所有的音频，进而获得音频的总长度，最后按照音频长度对 query_data 进行降序排序
        lang = os.getenv("lang")
        if lang is None:
            lang = dataset_config.get("global", {}).get("lang", "en")
        audio_lengths = []
        for query_item in dataset_config.get("query_data", []):
            query_item["lang"] = lang
            audio_path = os.path.join(local_dataset_path, query_item["file"])
            query_item["file"] = audio_path
            audio_lengths.append(os.path.getsize(audio_path) / 1024 / 1024)
        dataset_config["query_data"] = sorted(
            dataset_config.get("query_data", []),
            key=lambda x: audio_lengths[dataset_config["query_data"].index(x)],
            reverse=True,
        )
        # 数据集信息
        # dataset_global_config = dataset_config.get("global", {})
        dataset_query = dataset_config.get("query_data", {})
        config[lang] = dataset_config
    # sut url
    sut_url = get_sut_url()
    try:
        # 开始测试
        logger.info("开始执行")
        evaluator = BaseEvaluator()
        future_list = []
        with ThreadPoolExecutor(max_workers=thread_num) as executor:
            for idx, query_item in enumerate(dataset_query):
                context = ASRContext(
                    **config[query_item["lang"]].get("global", {}),
                )
                context.lang = query_item["lang"]
                context.file_path = query_item["file"]
                context.append_labels(query_item["voice"])
                future = executor.submit(
                    ClientAsync(sut_url, context, idx).action
                )
                future_list.append(future)
        for future in concurrent.futures.as_completed(future_list):
            context = future.result()
            evaluator.evaluate(context)
            detail_case = evaluator.gen_detail_case()
            with open(detail_cases_filepath, "a") as fp:
                fp.write(
                    json.dumps(
                        detail_case.to_dict(),
                        ensure_ascii=False,
                    )
                    + "\n",
                )
            del context
            gc.collect()
        evaluator.post_evaluate()
        output_result = evaluator.gen_result()
        logger.info("执行完成")
        with open(result_filepath, "w") as fp:
            json.dump(output_result, fp, indent=2, ensure_ascii=False)
        with open(bad_cases_filepath, "w") as fp:
            fp.write("当前榜单不存在 Bad Case\n")
        if SHARE_SUT:
            with open(SUT_SHARE_JOB_STATUS, "w") as f:
                f.write("success")
            fcntl.flock(fd_lock, fcntl.LOCK_UN)
            fd_lock.close()
            while SHARE_SUT and do_deploy_chart:
                time.sleep(30)
                success_num = 0
                for job_status_file in glob.glob(dirname + "/job_status.*"):
                    with open(job_status_file, "r") as f:
                        job_status = f.read()
                        success_num += job_status == "success"
                if success_num == int(DATASET_NUM):
                    break
                logger.info("Waiting for all jobs to complete")
    except Exception:
        if SHARE_SUT:
            postprocess_failed()
        raise
    sys.exit(0)
 if __name__ == "__main__":
    main()
--- a/schemas/init.py
+++ b/schemas/init.py
--- a/schemas/context.py
+++ b/schemas/context.py
@@ -1,90 +0,0 @@
 import os
 from copy import deepcopy
 from typing import Dict, List, Optional
 from pydantic import BaseModel, Field
 from schemas.stream import StreamDataModel
 class LabelContext(BaseModel):
    start: float
    end: float
    answer: str
 class PredContext(BaseModel):
    recognition_results: StreamDataModel
    recv_time: Optional[float] = Field(None)
    send_time: Optional[float] = Field(None)
 class ASRContext:
    def __init__(self, **kwargs):
        self.bits = kwargs.get("bits", 16)
        self.channel = kwargs.get("channel", 1)
        self.sample_rate = kwargs.get("sample_rate", 16000)
        self.audio_format = kwargs.get("format", "wav")
        self.enable_words = kwargs.get("enable_words", True)
        self.char_contains_rate = kwargs.get("char_contains_rate", 0.8)
        self.lang = os.getenv("lang")
        if self.lang is None:
            self.lang = kwargs.get("lang", "en")
        self.stream = kwargs.get("stream", True)
        self.wait_time = float(os.getenv("wait_time", 0.1))
        self.chunk_size = self.sample_rate * self.bits / 8 * self.wait_time
        if int(os.getenv('chunk_size_set', 0)):
            self.chunk_size = int(os.getenv('chunk_size_set', 0))
        self.audio_length = 0
        self.file_path = ""
        self.labels: List[LabelContext] = kwargs.get("labels", [])
        self.preds: List[PredContext] = kwargs.get("preds", [])
        self.label_sentences: List[str] = []
        self.pred_sentences: List[str] = []
        self.send_time_start_end = []
        self.recv_time_start_end = []
        self.fail = False
        self.fail_char_contains_rate_num = 0
        self.punctuation_num = 0
        self.pred_punctuation_num = 0
    def append_labels(self, voices: List[Dict]):
        for voice_data in voices:
            label_context = LabelContext(**voice_data)
            self.labels.append(label_context)
    def append_preds(
        self,
        predict_data: List[StreamDataModel],
        send_time: List[float],
        recv_time: List[float],
    ):
        self.send_time_start_end = [send_time[0], send_time[-1]] if len(send_time) > 0 else []
        self.recv_time_start_end = [recv_time[0], recv_time[-1]] if len(recv_time) > 0 else []
        for pred_item, send_time_item, recv_time_item in zip(predict_data, send_time, recv_time):
            pred_item = deepcopy(pred_item)
            pred_context = PredContext(recognition_results=pred_item.model_dump())
            pred_context.send_time = send_time_item
            pred_context.recv_time = recv_time_item
            self.preds.append(pred_context)
    def to_dict(self):
        return {
            "bits": self.bits,
            "channel": self.channel,
            "sample_rate": self.sample_rate,
            "audio_format": self.audio_format,
            "enable_words": self.enable_words,
            "stream": self.stream,
            "wait_time": self.wait_time,
            "chunk_size": self.chunk_size,
            "labels": [item.model_dump_json() for item in self.labels],
            "preds": [item.model_dump_json() for item in self.preds],
        }
--- a/schemas/dataset.py
+++ b/schemas/dataset.py
@@ -1,18 +0,0 @@
 from typing import List
 from pydantic import BaseModel, Field
 class QueryDataSentence(BaseModel):
    answer: str = Field(description="文本label")
    start: float = Field(description="句子开始时间")
    end: float = Field(description="句子结束时间")
 class QueryData(BaseModel):
    lang: str = Field(description="语言")
    file: str = Field(description="音频文件位置")
    duration: float = Field(description="音频长度")
    voice: List[QueryDataSentence] = Field(
        description="音频文件的文本label内容"
    )
--- a/schemas/stream.py
+++ b/schemas/stream.py
@@ -1,66 +0,0 @@
 from typing import List
 from pydantic import BaseModel, ValidationError, field_validator
 from pydantic import model_validator
 class StreamWordsModel(BaseModel):
    text: str
    start_time: float
    end_time: float
    @model_validator(mode="after")
    def check_result(self):
        if self.end_time < self.start_time:
            raise ValidationError("end-time 小于 start-time, error")
        return self
 class StreamDataModel(BaseModel):
    text: str
    language: str
    final_result: bool
    para_seq: int
    start_time: float
    end_time: float
    words: List[StreamWordsModel]
    @model_validator(mode="after")
    def check_result(self):
        if self.end_time < self.start_time:
            raise ValidationError("end-time 小于 start-time, error")
        return self
 class StreamResultModel(BaseModel):
    asr_results: StreamDataModel
    @field_validator('asr_results', mode="after")
    def convert_to_seconds(cls, v: StreamDataModel, values):
        # 在这里处理除以1000的逻辑
        v.end_time = v.end_time / 1000
        v.start_time = v.start_time / 1000
        for word in v.words:
            word.start_time /= 1000
            word.end_time /= 1000
        return v
    class Config:
        validate_assignment = True
 class NonStreamDataModel(BaseModel):
    text: str
    para_seq: int
    start_time: float
    end_time: float
    @model_validator(mode="after")
    def check_result(self):
        if self.end_time < self.start_time:
            raise ValidationError("end-time 小于 start-time, error")
        return self
 class NonStreamResultModel(BaseModel):
    contents: List[NonStreamDataModel]
--- a/scripts/check_dataset_time.py
+++ b/scripts/check_dataset_time.py
@@ -1,53 +0,0 @@
 import os
 import sys
 from collections import defaultdict
 import yaml
 def main(dataset_dir):
    dirs = os.listdir(dataset_dir)
    dirs = list(
        filter(lambda x: os.path.isdir(os.path.join(dataset_dir, x)), dirs)
    )
    problem_dirs = set()
    problem_count = defaultdict(int)
    for dir in dirs:
        with open(os.path.join(dataset_dir, dir, "data.yaml"), "r") as f:
            data = yaml.full_load(f)
        for query_i, query in enumerate(data["query_data"]):
            voices = sorted(query["voice"], key=lambda x: x["start"])
            if voices != query["voice"]:
                print("-----", dir)
            if voices[0]["start"] > voices[0]["end"]:
                print(
                    "err1: %s 第%s个query的第%d个voice的start大于end: %s"
                    % (dir, query_i, 0, voices[0]["answer"])
                )
                problem_dirs.add(dir)
            for voice_i in range(1, len(voices)):
                voice = voices[voice_i]
                if voice["start"] > voice["end"]:
                    print(
                        "err1: %s 第%s个query的第%d个voice的start大于end: %s"
                        % (dir, query_i, voice_i, voice["answer"])
                    )
                    problem_dirs.add(dir)
                if voice["start"] < voices[voice_i - 1]["end"]:
                    print(
                        "err2: %s 第%s个query的第%d个voice的start小于前一个voice的end: %s"
                        % (dir, query_i, voice_i, voice["answer"])
                    )
                    problem_dirs.add(dir)
                    problem_count[dir] += 1
    print(len(dirs))
    print(problem_dirs)
    print(problem_count)
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("指定 测试数据集文件夹")
        sys.exit(1)
    main(sys.argv[1])
--- a/scripts/convert_callback_dataset.py
+++ b/scripts/convert_callback_dataset.py
@@ -1,108 +0,0 @@
 import json
 import os
 import shutil
 import sys
 import zipfile
 import yaml
 """
 target
 {
    "global": {
        "lang": ""
    },
    "query_data": [
        "file": "",
        "duration": 2.0,
        "voice": [
            {
                "answer": "",
                "start": 0.0,
                "end": 1.0
            }
        ]
    ]
 }
 """
 def situation_a(meta, dataset_folder, output_folder):
    """
    {
        "combined": {
            "en": [
                {
                    "wav": "*.wav",
                    "transcriptions": [
                        {
                            "text": "",
                            "start": 0.0,
                            "end": 1.0
                        }
                    ],
                    "duration": 2.0
                }
            ]
        }
    }
    """
    meta = meta["combined"]
    for lang, arr in meta.items():
        print("processing", lang)
        assert len(lang) == 2
        lang_folder = os.path.join(output_folder, lang)
        os.makedirs(lang_folder, exist_ok=True)
        data = {"global": {"lang": lang}, "query_data": []}
        query_data = data["query_data"]
        for item in arr:
            os.makedirs(
                os.path.join(lang_folder, os.path.dirname(item["wav"])),
                exist_ok=True,
            )
            mp3_file = item["wav"][:-4] + ".mp3"
            shutil.copyfile(
                os.path.join(dataset_folder, mp3_file),
                os.path.join(lang_folder, mp3_file),
            )
            query_data_item = {
                "file": mp3_file,
                "duration": float(item["duration"]),
                "voice": [],
            }
            query_data.append(query_data_item)
            voice = query_data_item["voice"]
            for v in item["transcriptions"]:
                voice.append(
                    {
                        "answer": v["text"],
                        "start": float(v["start"]),
                        "end": float(v["end"]),
                    }
                )
        with open(os.path.join(lang_folder, "data.yaml"), "w") as f:
            yaml.dump(data, f, indent=2, allow_unicode=True, encoding="utf-8")
        with zipfile.ZipFile(
            os.path.join(output_folder, lang + ".zip"), "w"
        ) as ziper:
            dirname = lang_folder
            for path, _, files in os.walk(dirname):
                for file in files:
                    ziper.write(
                        os.path.join(path, file),
                        os.path.join(path[len(dirname) :], file),
                        zipfile.ZIP_DEFLATED,
                    )
 if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("指定 数据集文件夹路径 输出路径")
        sys.exit(1)
    dataset_folder = sys.argv[1]
    output_folder = sys.argv[2]
    with open(os.path.join(dataset_folder, "meta.json")) as f:
        meta = json.load(f)
    situation_a(meta, dataset_folder, output_folder)
--- a/scripts/debug_detailcase.py
+++ b/scripts/debug_detailcase.py
@@ -1,56 +0,0 @@
 import json
 import sys
 from schemas.dataset import QueryData
 from schemas.stream import StreamDataModel
 from utils.evaluator_plus import evaluate_editops
 def main(detailcase_file: str):
    with open(detailcase_file) as f:
        d = json.load(f)[0]
    preds = d["preds"]
    preds = list(map(lambda x: StreamDataModel(**x), preds))
    preds = list(filter(lambda x: x.final_result, preds))
    label = d["label"]
    label = QueryData(**label)
    print(evaluate_editops(label, preds))
 def evaluate_from_record(detailcase_file: str, record_path: str):
    with open(detailcase_file) as f:
        d = json.load(f)[0]
    label = d["label"]
    label = QueryData(**label)
    with open(record_path) as f:
        record = json.load(f)
    tokens_pred = record["tokens_pred"]
    tokens_label = record["tokens_label"]
    recognition_results = record["recognition_results"]
    recognition_results = list(
        map(lambda x: StreamDataModel(**x), recognition_results)
    )
    a, b = [], []
    for i, rr in enumerate(recognition_results):
        if rr.final_result:
            a.append(tokens_pred[i])
            b.append(rr)
    tokens_pred = a
    recognition_results = b
    print(
        evaluate_editops(
            label,
            recognition_results,
            tokens_pred,
            tokens_label,
        )
    )
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("请指定 detailcase 文件路径")
        sys.exit(1)
    main(sys.argv[1])
    # evaluate_from_record(sys.argv[1], sys.argv[2])
--- a/BIN
+++ b/BIN
--- a/starting_kit/Dockerfile
+++ b/starting_kit/Dockerfile
@@ -1,11 +0,0 @@
 FROM harbor.4pd.io/inf/base-python3.8-ubuntu:1.1.0
 WORKDIR /workspace
 ADD ./requirements.txt /workspace
 RUN pip install -r ./requirements.txt -i https://nexus.4pd.io/repository/pypi-all/simple --trusted-host nexus.4pd.io --extra-index-url https://mirrors.aliyun.com/pypi/simple/ \
    && pip cache purge
 ADD . /workspace
 CMD ["python", "main.py"]
--- a/starting_kit/main.py
+++ b/starting_kit/main.py
@@ -1,313 +0,0 @@
 import logging
 import os
 import threading
 import time
 from typing import Optional
 import flask
 import requests
 from werkzeug.datastructures import FileStorage
 app = flask.Flask(__name__)
 heartbeat_active = False
 log = logging.getLogger(__name__)
 log.propagate = False
 level = logging.INFO
 log.setLevel(level)
 formatter = logging.Formatter(
    "[%(asctime)s] %(levelname)s : %(pathname)s:%(lineno)d - %(message)s",
    "%Y-%m-%d %H:%M:%S",
 )
 streamHandler = logging.StreamHandler()
 streamHandler.setLevel(level)
 streamHandler.setFormatter(formatter)
 log.addHandler(streamHandler)
 def heartbeat(url):
    global heartbeat_active
    if heartbeat_active:
        return
    heartbeat_active = True
    while True:
        try:
            requests.post(url, json={"status": "RUNNING"})
        except Exception:
            pass
        time.sleep(10)
 def asr(
    audio_file: FileStorage,
    language: Optional[str],
    progressCallbackUrl: str,
    taskId: str,
 ):
    """TODO: 读取audio_file, 调用语音识别服务, 实时返回识别结果"""
    # ignore BEGIN
    # 此处为榜单本地测试使用
    if os.getenv("LOCAL_TEST"):
        return local_test(progressCallbackUrl, taskId)
    # ignore END
    language = "de"
    # 某一次识别返回
    requests.post(
        progressCallbackUrl,
        json={
            "taskId": taskId,
            "status": "RUNNING",
            "recognition_results": {  # 传增量结果, status如果是FINISHED, 或者ERROR, 这个字段请不要传值
                "text": "最先启动的还是",
                "final_result": True,
                "para_seq": 0,
                "language": language,
                "start_time": 6300,
                "end_time": 6421,
                "words": [
                    {
                        "text": "最",
                        "start_time": 6300,
                        "end_time": 6321,
                    },
                    {
                        "text": "先",
                        "start_time": 6321,
                        "end_time": 6345,
                    },
                    {
                        "text": "启",
                        "start_time": 6345,
                        "end_time": 6350,
                    },
                    {
                        "text": "动",
                        "start_time": 6350,
                        "end_time": 6370,
                    },
                    {
                        "text": "的",
                        "start_time": 6370,
                        "end_time": 6386,
                    },
                    {
                        "text": "还",
                        "start_time": 6386,
                        "end_time": 6421,
                    },
                    {
                        "text": "是",
                        "start_time": 6421,
                        "end_time": 6435,
                    },
                ],
            },
        },
    )
    # ... 识别结果返回完毕
    # 识别结束
    requests.post(
        progressCallbackUrl,
        json={
            "taskId": taskId,
            "status": "FINISHED",
        },
    )
@app.post("/predict")
 def predict():
    body = flask.request.form
    language = body.get("language")
    if language is None:
        "自行判断语种"
    taskId = body["taskId"]
    progressCallbackUrl = body["progressCallbackUrl"]
    heartbeatUrl = body["heartbeatUrl"]
    threading.Thread(
        target=heartbeat, args=(heartbeatUrl,), daemon=True
    ).start()
    audio_file = flask.request.files["file"]
    # audio_file.stream  # 读取文件流
    # audio_file.save("audio.mp3")  # 保存文件
    threading.Thread(
        target=asr,
        args=(audio_file, language, progressCallbackUrl, taskId),
        daemon=True,
    ).start()
    return flask.jsonify({"status": "OK"})
 # ignore BEGIN
 def local_test(progressCallbackUrl: str, taskId: str):
    """忽略此方法, 此方法为榜单本地调试使用"""
    import random
    import re
    import yaml
    def callback(content):
        try:
            if content is None:
                requests.post(
                    progressCallbackUrl,
                    json={"taskId": taskId, "status": "FINISHED"},
                )
            else:
                requests.post(
                    progressCallbackUrl,
                    json={
                        "taskId": taskId,
                        "status": "RUNNING",
                        "recognition_results": content,
                    },
                )
        except Exception:
            pass
    with open(
        os.getenv("LOCAL_TEST_DATA_PATH", "../dataset/out/data.yaml")
    ) as f:
        data = yaml.full_load(f)
    voices = data["query_data"][0]["voice"]
    # 首次发送
    first_send_time = random.randint(3, 5)
    send_interval = random.random() * 0
    log.info("首次发送%ss 发送间隔%ss" % (first_send_time, send_interval))
    time.sleep(first_send_time)
    # 将句子拼接到一起
    if random.random() < 0.3:
        log.info("将部分句子合并成单句 每次合并的句子不超过3句")
        rand_idx = 0
        rand_sep = [0, len(voices) - 1]
        while rand_sep[rand_idx] + 1 <= rand_sep[rand_idx + 1] - 1:
            rand_cursep = random.randint(
                rand_sep[rand_idx] + 1,
                min(rand_sep[rand_idx + 1] - 1, rand_sep[rand_idx] + 1 + 3),
            )
            rand_sep.insert(rand_idx + 1, rand_cursep)
            rand_idx += 1
        merged_voices = []
        for i, cur_sep in enumerate(rand_sep[:-1]):
            voice = voices[cur_sep]
            for j in range(cur_sep + 1, rand_sep[i + 1]):
                voice["answer"] += voices[j]["answer"]
                voice["end"] = voices[j]["end"]
            merged_voices.append(voice)
        merged_voices.append(voices[rand_sep[-1]])
        voices = merged_voices
    def split_and_keep(text, delimiters):
        # 构建正则表达式模式，匹配文本或分隔符
        pattern = "|".join(re.escape(delimiter) for delimiter in delimiters)
        pattern = f"(?:[^{pattern}]+|[{pattern}])"
        return re.findall(pattern, text)
    puncs = [",", ".", "?", "!", ";", ":"]
    para_seq = 0
    for voice in voices:
        answer: str = voice["answer"]
        start_time: float = voice["start"]
        end_time: float = voice["end"]
        words = split_and_keep(answer, puncs)
        temp_words = []
        for i, word in enumerate(words):
            if i > 0 and i < len(words) - 1 and random.random() < 0.15:
                log.info("随机删除word")
                continue
            temp_words.extend(word.split(" "))
        if len(temp_words) == 0:
            temp_words = words[0].split(" ")
        words = temp_words
        answer = " ".join(words)
        words = list(map(lambda x: x.strip(), words))
        words = list(filter(lambda x: len(x) > 0, words))
        # 将时间均匀分配到每个字上
        words_withtime = []
        word_unittime = (end_time - start_time) / len(words)
        for i, word in enumerate(words):
            word_start = start_time + word_unittime * i
            word_end = word_start + word_unittime
            words_withtime.append(
                {
                    "text": word,
                    "start_time": word_start * 1000,
                    "end_time": word_end * 1000,
                }
            )
        # 将句子首尾的标点符号时间扩展到字上 标点符号时间为瞬间
        punc_at = 0
        while punc_at < len(words) and words[punc_at] in puncs:
            punc_at += 1
        if punc_at < len(words):
            words_withtime[punc_at]["start_time"] = words_withtime[0][
                "start_time"
            ]
        for i in range(0, punc_at):
            words_withtime[i]["start_time"] = words_withtime[0]["start_time"]
            words_withtime[i]["end_time"] = words_withtime[0]["start_time"]
        punc_at = len(words) - 1
        while punc_at >= 0 and words[punc_at] in puncs:
            punc_at -= 1
        if punc_at >= 0:
            words_withtime[punc_at]["end_time"] = words_withtime[-1]["end_time"]
        for i in range(punc_at + 1, len(words)):
            words_withtime[i]["start_time"] = (
                words_withtime[-1]["end_time"] + 0.1
            )
            words_withtime[i]["end_time"] = words_withtime[-1]["end_time"] + 0.1
        if random.random() < 0.4 and len(words_withtime) > 1:
            log.info("发送一次final_result=False")
            rand_idx = random.randint(1, len(words_withtime) - 1)
            recognition_result = {
                "text": " ".join(
                    map(lambda x: x["text"], words_withtime[:rand_idx])
                ),
                "final_result": False,
                "para_seq": para_seq,
                "language": "de",
                "start_time": start_time * 1000,
                "end_time": end_time * 1000,
                "words": words_withtime[:rand_idx],
            }
            callback(recognition_result)
        recognition_result = {
            "text": answer,
            "final_result": True,
            "para_seq": para_seq,
            "language": "de",
            "start_time": start_time * 1000,
            "end_time": end_time * 1000,
            "words": words_withtime,
        }
        callback(recognition_result)
        para_seq += 1
        log.info("send %s" % para_seq)
        time.sleep(send_interval)
    callback(None)
 # ignore END
 if __name__ == "__main__":
    app.run(host="0.0.0.0", port=80)
--- a/starting_kit/requirements.txt
+++ b/starting_kit/requirements.txt
@@ -1,3 +0,0 @@
 flask
 requests
 pyyaml
--- a/tests/test_callback_editops.py
+++ b/tests/test_callback_editops.py
@@ -1,16 +0,0 @@
 import json
 from schemas.dataset import QueryData
 from schemas.stream import StreamDataModel
 from utils.evaluator_plus import evaluate_editops
 with open("out/detail_cases.json") as f:
    detail_cases = json.load(f)
 detail_case = detail_cases[0]
 preds = []
 for pred in detail_case["preds"]:
    preds.append(StreamDataModel.model_validate(pred))
 label = QueryData.model_validate(detail_case["label"])
 print(evaluate_editops(label, preds))
--- a/tests/test_cer.py
+++ b/tests/test_cer.py
@@ -1,93 +0,0 @@
 """
 f(a, b) 计算 a -> b 的编辑距离，使用的方法是之前asr榜单的方法
 g(a, b) 计算 a -> b 的编辑距离，使用的是原始的编辑距离计算方法
 test() 是对拍程序
 """
 import random
 import string
 from copy import deepcopy
 from typing import List, Tuple
 import Levenshtein
 def mapping(gt: str, dt: str):
    return [i for i in gt], [i for i in dt]
 def token_mapping(
    tokens_gt: List[str], tokens_dt: List[str]
 ) -> Tuple[List[str], List[str]]:
    arr1 = deepcopy(tokens_gt)
    arr2 = deepcopy(tokens_dt)
    operations = Levenshtein.editops(arr1, arr2)
    for op in operations[::-1]:
        if op[0] == "insert":
            arr1.insert(op[1], None)
        elif op[0] == "delete":
            arr2.insert(op[2], None)
    return arr1, arr2
 def cer(tokens_gt_mapping: List[str], tokens_dt_mapping: List[str]):
    """输入的是经过编辑距离映射后的两个 token 序列，返回 1-cer, token-cnt"""
    insert = sum(1 for item in tokens_gt_mapping if item is None)
    delete = sum(1 for item in tokens_dt_mapping if item is None)
    equal = sum(
        1
        for token_gt, token_dt in zip(tokens_gt_mapping, tokens_dt_mapping)
        if token_gt == token_dt
    )
    replace = len(tokens_gt_mapping) - insert - equal  # - delete
    return replace, delete, insert
 def f(a, b):
    return cer(*token_mapping(*mapping(a, b)))
 def raw(tokens_gt, tokens_dt):
    arr1 = deepcopy(tokens_gt)
    arr2 = deepcopy(tokens_dt)
    operations = Levenshtein.editops(arr1, arr2)
    insert = 0
    delete = 0
    replace = 0
    for op in operations:
        if op[0] == "insert":
            insert += 1
        if op[0] == "delete":
            delete += 1
        if op[0] == "replace":
            replace += 1
    return replace, delete, insert
 def g(a, b):
    return raw(*mapping(a, b))
 def check(a, b):
    ff = f(a, b)
    gg = g(a, b)
    if ff != gg:
        print(ff, gg)
    return ff == gg
 def random_string(length):
    letters = string.ascii_lowercase
    return "".join(random.choice(letters) for i in range(length))
 def test():
    for _ in range(10000):
        a = random_string(30)
        b = random_string(30)
        if not check(a, b):
            print(a, b)
            break
 test()