diff --git a/.DS_Store b/.DS_Store index 0a7970e..2a2f595 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..81c6be4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,49 @@ + +FROM harbor.4pd.io/inf/base-python3.8-ubuntu:1.1.0 +MAINTAINER shiguangchuan@4paradigm.com + +WORKDIR /workspace + +COPY ssh-keygen /bin + +RUN wget -q ftp://ftp.4pd.io/pub/pico/temp/pynini-2.1.6-cp38-cp38-manylinux_2_31_x86_64.whl && pip install pynini-2.1.6-cp38-cp38-manylinux_2_31_x86_64.whl && rm -f pynini-2.1.6-c p38-cp38-manylinux_2_31_x86_64.whl + +ADD ./requirements.txt /workspace +RUN pip install -r ./requirements.txt -i https://nexus.4pd.io/repository/pypi-all/simple --trusted-host nexus.4pd.io --extra-index-url https://mirrors.aliyun.com/pypi/simple/ \ + && pip cache purge \ + && ssh-keygen -f /workspace/ssh-key-ecdsa -t ecdsa -b 521 -q -N "" + +ADD . /workspace + +EXPOSE 80 + +CMD ["python3", "run_callback.py"] + + +########################### +## Dockerfile(更新后) +#FROM harbor.4pd.io/lab-platform/inf/python:3.9 + +#WORKDIR /app + +## 安装依赖 +##RUN pip install torch librosa flask + +##RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \ +## pip cache purge && \ +## pip --default-timeout=1000 install torch librosa flask + +## 删除原来的 COPY pytorch_model.bin /app/ + +#COPY inference.py /app/ +# 只需要复制启动脚本 + +#EXPOSE 80 + +#CMD ["python", "inference.py"] +#################### + + +##############################更新0731################################# + + diff --git a/asr-tco_image b/asr-tco_image deleted file mode 160000 index 8f9a14f..0000000 --- a/asr-tco_image +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8f9a14f472d64380854bfefb685abfb76b073074 diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..4ebcc2b --- /dev/null +++ b/config.yaml @@ -0,0 +1,6 @@ +leaderboard_options: + nfs: + - name: sid_model + srcRelativePath: zhoushasha/models/image_models/apple_mobilevit-small + mountPoint: /model + source: ceph_customer \ No newline at end of file diff --git a/helm-chart/.DS_Store b/helm-chart/.DS_Store new file mode 100644 index 0000000..38ac210 Binary files /dev/null and b/helm-chart/.DS_Store differ diff --git a/helm-chart/README.md b/helm-chart/README.md new file mode 100644 index 0000000..90bd7e3 --- /dev/null +++ b/helm-chart/README.md @@ -0,0 +1,77 @@ +## judgeflow chart 的要求 + +### values.yaml 文件必须包含如下字段,并且模板中必须引用 values.yaml 中的如下字段 + +``` +podLabels +env +volumeMounts +volumes +affinity +``` + +### values.yaml 文件必须在 volumeMounts 中声明如下卷 + +``` +workspace +submit +datafile +``` + +## 被测服务(sut) chart 的要求 + +### values.yaml 文件必须包含如下字段,并且资源模板中必须引用 values.yaml 中的如下字段 + +``` +podLabels +affinity +``` + +针对 podLabels 字段,values.yaml 中配置格式如下: + +``` +podLabels: {} +``` + +下面给出示例 + +podLabels + +values.yaml + +templates/deployment.yaml + +``` +metadata: + labels: + {{- with .Values.podLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +``` + +affinity + +values.yaml + +``` +affinity: {} +``` + +templates/deployment.yaml + +``` +spec: + template: + spec: + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +``` + +### 如果需要在 sut 中使用共享存储,则 sut chart 的 values.yaml 也必须包含如下字段,且模板中必须引用 values.yaml 中的如下字段 + +``` +volumeMounts +volumes +``` diff --git a/helm-chart/asr-tco/.helmignore b/helm-chart/asr-tco/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/helm-chart/asr-tco/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-chart/asr-tco/Chart.yaml.tmpl b/helm-chart/asr-tco/Chart.yaml.tmpl new file mode 100644 index 0000000..35a3153 --- /dev/null +++ b/helm-chart/asr-tco/Chart.yaml.tmpl @@ -0,0 +1,24 @@ +apiVersion: v2 +name: ${chartName} +description: Leaderboard judgeflow helm chart for demo + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: ${version} + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "${appVersion}" diff --git a/helm-chart/asr-tco/templates/_helpers.tpl b/helm-chart/asr-tco/templates/_helpers.tpl new file mode 100644 index 0000000..e373350 --- /dev/null +++ b/helm-chart/asr-tco/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "judgeflow.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "judgeflow.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "judgeflow.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "judgeflow.labels" -}} +helm.sh/chart: {{ include "judgeflow.chart" . }} +{{ include "judgeflow.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "judgeflow.selectorLabels" -}} +app.kubernetes.io/name: {{ include "judgeflow.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "judgeflow.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "judgeflow.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-chart/asr-tco/templates/hpa.yaml b/helm-chart/asr-tco/templates/hpa.yaml new file mode 100644 index 0000000..45ab478 --- /dev/null +++ b/helm-chart/asr-tco/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "judgeflow.fullname" . }} + labels: + {{- include "judgeflow.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "judgeflow.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/helm-chart/asr-tco/templates/ingress.yaml b/helm-chart/asr-tco/templates/ingress.yaml new file mode 100644 index 0000000..959d442 --- /dev/null +++ b/helm-chart/asr-tco/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "judgeflow.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "judgeflow.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm-chart/asr-tco/templates/job.yaml b/helm-chart/asr-tco/templates/job.yaml new file mode 100644 index 0000000..bc8e51a --- /dev/null +++ b/helm-chart/asr-tco/templates/job.yaml @@ -0,0 +1,63 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "judgeflow.fullname" . }} + labels: + {{- include "judgeflow.labels" . | nindent 4 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + template: + metadata: + labels: + {{- include "judgeflow.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.priorityclassname }} + priorityClassName: "{{ . }}" + {{- end }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if and (hasKey .Values "service") (hasKey .Values.service "ports") }} + ports: + {{- range .Values.service.ports }} + - name: {{ .name }} + containerPort: {{ .port }} + {{- end }} + {{- end }} + {{- if hasKey .Values "command" }} + command: {{ .Values.command }} + {{- end }} + volumeMounts: + {{- toYaml .Values.volumeMounts | nindent 12 }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + restartPolicy: Never + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + backoffLimit: 0 diff --git a/helm-chart/asr-tco/templates/priorityclass.yaml b/helm-chart/asr-tco/templates/priorityclass.yaml new file mode 100644 index 0000000..7e1a884 --- /dev/null +++ b/helm-chart/asr-tco/templates/priorityclass.yaml @@ -0,0 +1,10 @@ +{{- if .Values.priorityclassname }} +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: "{{ .Values.priorityclassname }}" +value: {{ .Values.priorityclassvalue }} +globalDefault: false +preemptionPolicy: "Never" +description: "This is a priority class." +{{- end }} diff --git a/helm-chart/asr-tco/templates/service.yaml b/helm-chart/asr-tco/templates/service.yaml new file mode 100644 index 0000000..034a5d1 --- /dev/null +++ b/helm-chart/asr-tco/templates/service.yaml @@ -0,0 +1,22 @@ +{{- if and (hasKey .Values "service") (hasKey .Values.service "type") }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "judgeflow.fullname" . }} + labels: + {{- include "judgeflow.labels" . | nindent 4 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + {{- range .Values.service.ports }} + - port: {{ .port }} + targetPort: {{ .port }} + protocol: TCP + name: {{ .name }} + {{- end }} + selector: + {{- include "judgeflow.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/helm-chart/asr-tco/templates/serviceaccount.yaml b/helm-chart/asr-tco/templates/serviceaccount.yaml new file mode 100644 index 0000000..12df5c8 --- /dev/null +++ b/helm-chart/asr-tco/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "judgeflow.serviceAccountName" . }} + labels: + {{- include "judgeflow.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/helm-chart/asr-tco/templates/tests/test-connection.yaml b/helm-chart/asr-tco/templates/tests/test-connection.yaml new file mode 100644 index 0000000..c351ca2 --- /dev/null +++ b/helm-chart/asr-tco/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "judgeflow.fullname" . }}-test-connection + labels: + {{- include "judgeflow.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "judgeflow.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/helm-chart/asr-tco/values.yaml.tmpl b/helm-chart/asr-tco/values.yaml.tmpl new file mode 100644 index 0000000..0c73595 --- /dev/null +++ b/helm-chart/asr-tco/values.yaml.tmpl @@ -0,0 +1,124 @@ +# Default values for job_demo. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: "${imageRepo}" + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "${imageTag}" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} + +podLabels: + contest.4pd.io/leaderboard-resource-type: judge_flow + contest.4pd.io/leaderboard-job-id: "0" + contest.4pd.io/leaderboard-submit-id: "0" + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + ports: + - name: http + port: 80 + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + limits: + cpu: 3000m + memory: 16Gi + requests: + cpu: 3000m + memory: 16Gi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: + juicefs: "on" + contest.4pd.io/cpu: INTEL-8358 + +tolerations: [] + +affinity: {} + +env: + - name: TZ + value: Asia/Shanghai + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + +#command: '["python","run.py"]' + +volumeMounts: + - name: workspace + mountPath: /tmp/workspace + - name: datafile + mountPath: /tmp/datafile + - name: submit + mountPath: /tmp/submit_config + - name: juicefs-pv + mountPath: /tmp/juicefs + - name: customer + mountPath: /tmp/customer + - name: submit-private + mountPath: /tmp/submit_private + +volumes: + - name: juicefs-pv + persistentVolumeClaim: + claimName: juicefs-pvc + + +priorityclassname: '' +priorityclassvalue: '0' diff --git a/helm-chart/sut/.DS_Store b/helm-chart/sut/.DS_Store new file mode 100644 index 0000000..df37fdf Binary files /dev/null and b/helm-chart/sut/.DS_Store differ diff --git a/helm-chart/sut/.helmignore b/helm-chart/sut/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/helm-chart/sut/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-chart/sut/Chart.yaml b/helm-chart/sut/Chart.yaml new file mode 100644 index 0000000..5f95483 --- /dev/null +++ b/helm-chart/sut/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: sut +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.1.0" diff --git a/helm-chart/sut/templates/_helpers.tpl b/helm-chart/sut/templates/_helpers.tpl new file mode 100644 index 0000000..501d682 --- /dev/null +++ b/helm-chart/sut/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "sut.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "sut.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "sut.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "sut.labels" -}} +helm.sh/chart: {{ include "sut.chart" . }} +{{ include "sut.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "sut.selectorLabels" -}} +app.kubernetes.io/name: {{ include "sut.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "sut.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "sut.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-chart/sut/templates/deployment.yaml b/helm-chart/sut/templates/deployment.yaml new file mode 100644 index 0000000..ecabb4e --- /dev/null +++ b/helm-chart/sut/templates/deployment.yaml @@ -0,0 +1,94 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "sut.fullname" . }} + labels: + {{- include "sut.labels" . | nindent 4 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "sut.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "sut.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "sut.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- with .Values.priorityclassname }} + priorityClassName: "{{ . }}" + {{- end }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- with .Values.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.startupProbe }} + startupProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + + volumes: + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + tolerations: + - key: "hosttype" + operator: "Equal" + value: "iluvatar" + effect: "NoSchedule" \ No newline at end of file diff --git a/helm-chart/sut/templates/hpa.yaml b/helm-chart/sut/templates/hpa.yaml new file mode 100644 index 0000000..b3b17a0 --- /dev/null +++ b/helm-chart/sut/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "sut.fullname" . }} + labels: + {{- include "sut.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "sut.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/helm-chart/sut/templates/ingress.yaml b/helm-chart/sut/templates/ingress.yaml new file mode 100644 index 0000000..4ecfe9b --- /dev/null +++ b/helm-chart/sut/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "sut.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "sut.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm-chart/sut/templates/service.yaml b/helm-chart/sut/templates/service.yaml new file mode 100644 index 0000000..0a1e857 --- /dev/null +++ b/helm-chart/sut/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "sut.fullname" . }} + labels: + {{- include "sut.labels" . | nindent 4 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: socket + selector: + {{- include "sut.selectorLabels" . | nindent 4 }} diff --git a/helm-chart/sut/templates/serviceaccount.yaml b/helm-chart/sut/templates/serviceaccount.yaml new file mode 100644 index 0000000..3e9368c --- /dev/null +++ b/helm-chart/sut/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "sut.serviceAccountName" . }} + labels: + {{- include "sut.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/helm-chart/sut/templates/tests/test-connection.yaml b/helm-chart/sut/templates/tests/test-connection.yaml new file mode 100644 index 0000000..d506eb5 --- /dev/null +++ b/helm-chart/sut/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "sut.fullname" . }}-test-connection" + labels: + {{- include "sut.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "sut.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/helm-chart/sut/values.yaml.tmpl b/helm-chart/sut/values.yaml.tmpl new file mode 100644 index 0000000..f5d7123 --- /dev/null +++ b/helm-chart/sut/values.yaml.tmpl @@ -0,0 +1,144 @@ +# Default values for sut. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: harbor.4pd.io/lab-platform/inf/python + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: 3.9 + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} +podLabels: {} +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + port: 80 + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + limits: + cpu: 1000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 4096Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: [] +# - name: foo +# secret: +# secretName: mysecret +# optional: false + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nodeSelector: + contest.4pd.io/accelerator: iluvatar-BI-V100 + +tolerations: + - key: hosttype + operator: Equal + value: iluvatar + effect: NoSchedule + + +affinity: {} + +readinessProbe: + failureThreshold: 1000 + httpGet: + path: /health + port: 80 + scheme: HTTP + +#readinessProbe: +# httpGet: +# path: /health +# port: 80 +# scheme: HTTP +# initialDelaySeconds: 5 # 应用启动后等待 5 秒再开始探测 +# failureThreshold: 5 # 连续失败 3 次后标记为未就绪 +# successThreshold: 1 # 连续成功 1 次后标记为就绪 + +env: + - name: TZ + value: Asia/Shanghai + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: MY_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + +#command: '' + + +priorityclassname: '' diff --git a/local_test.py b/local_test.py new file mode 100644 index 0000000..323fc39 --- /dev/null +++ b/local_test.py @@ -0,0 +1,64 @@ +import os +import tempfile +import shutil + +if os.path.exists("/tmp/submit_private"): + shutil.rmtree("/tmp/submit_private") + +with tempfile.TemporaryDirectory() as tempdir: + config_path = os.path.join(tempdir, "config.json") + + assert not os.system(f"ssh-keygen -f {tempdir}/ssh-key-ecdsa -t ecdsa -b 521 -q -N \"\"") + + config = """ + model: whisper + model_key: whisper + config.json: + name: 'faster-whisper-server:latest' + support_devices: + - cpu + model_path: '' + port: 8080 + other_ports: [] + other_ports_count: 1 + entrypoint: start.bat + MIN_CHUNK: 2.5 + MIN_ADD_CHUNK: 2.5 + COMPUTE_TYPE: int8 + NUM_WORKERS: 1 + CPU_THREADS: 2 + BEAM_SIZE: 5 + BATCH: 1 + LANG: auto + DEVICE: cpu + CHUNK_LENGTH: 5 + CLASS_MODEL: ./models/faster-whisper-base + EN_MODEL: ./models/faster-whisper-base + ZH_MODEL: ./models/faster-whisper-base + RU_MODEL: ./models/faster-whisper-base + PT_MODEL: ./models/faster-whisper-base + AR_MODEL: ./models/faster-whisper-base + NEW_VERSION: 1 + NEED_RESET: 0 + leaderboard_options: + nfs: + - name: whisper + srcRelativePath: leaderboard/pc_asr/en.tar.gz + mountPoint: /tmp + source: ceph_customer + """ + + with open(config_path, "w") as f: + f.write(config) + + os.environ["SSH_KEY_DIR"] = tempdir + os.environ["SUBMIT_CONFIG_FILEPATH"] = config_path + os.environ["MODEL_MAPPING"] = '{"whisper": "edge-ml.tar.gz"}' + + from run_async_a10 import get_sut_url_windows + + + print(get_sut_url_windows()) + + import time + time.sleep(3600) \ No newline at end of file diff --git a/mock_env.sh b/mock_env.sh new file mode 100644 index 0000000..d14fd85 --- /dev/null +++ b/mock_env.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +export DATASET_FILEPATH=dataset/formatted1/de.zip +export RESULT_FILEPATH=out/result.json +export DETAILED_CASES_FILEPATH=out/detail_cases.json +export SUBMIT_CONFIG_FILEPATH= +export BENCHMARK_NAME= +export MY_POD_IP=127.0.0.1 diff --git a/model_test_caltech_3.py b/model_test_caltech_3.py new file mode 100644 index 0000000..d41a6b6 --- /dev/null +++ b/model_test_caltech_3.py @@ -0,0 +1,215 @@ +import requests +import json +import torch +from PIL import Image +from io import BytesIO +from transformers import BeitImageProcessor, BeitForImageClassification +# 根据模型实际架构选择类 +from transformers import ViTForImageClassification, BeitForImageClassification +from tqdm import tqdm +from transformers import AutoConfig +from transformers import AutoImageProcessor, AutoModelForImageClassification +import os +import random +import time # 新增导入时间模块 + +# 支持 Iluvatar GPU 加速,若不可用则使用 CPU +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"当前使用的设备: {device}") # 添加调试信息 + +# 若有多块 GPU,可使用 DataParallel 进行并行计算 +if torch.cuda.device_count() > 1: + print(f"使用 {torch.cuda.device_count()} 块 GPU 进行计算") + +class COCOImageClassifier: + def __init__(self, model_path: str, local_image_paths: list): + """初始化COCO图像分类器""" + self.processor = AutoImageProcessor.from_pretrained(model_path) + self.model = AutoModelForImageClassification.from_pretrained(model_path) + + # 将模型移动到设备 + self.model = self.model.to(device) + print(f"模型是否在 GPU 上: {next(self.model.parameters()).is_cuda}") # 添加调试信息 + + # 若有多块 GPU,使用 DataParallel + if torch.cuda.device_count() > 1: + self.model = torch.nn.DataParallel(self.model) + + self.id2label = self.model.module.config.id2label if hasattr(self.model, 'module') else self.model.config.id2label + self.local_image_paths = local_image_paths + + def predict_image_path(self, image_path: str, top_k: int = 5) -> dict: + """ + 预测本地图片文件对应的图片类别 + + Args: + image_path: 本地图片文件路径 + top_k: 返回置信度最高的前k个类别 + + Returns: + 包含预测结果的字典 + """ + try: + # 打开图片 + image = Image.open(image_path).convert("RGB") + + # 预处理 + inputs = self.processor(images=image, return_tensors="pt") + + # 将输入数据移动到设备 + inputs = inputs.to(device) + + # 模型推理 + with torch.no_grad(): + outputs = self.model(**inputs) + + # 获取预测结果 + logits = outputs.logits + probs = torch.nn.functional.softmax(logits, dim=1) + top_probs, top_indices = probs.topk(top_k, dim=1) + + # 整理结果 + predictions = [] + for i in range(top_k): + class_idx = top_indices[0, i].item() + confidence = top_probs[0, i].item() + predictions.append({ + "class_id": class_idx, + "class_name": self.id2label[class_idx], + "confidence": confidence + }) + + return { + "image_path": image_path, + "predictions": predictions + } + + except Exception as e: + print(f"处理图片文件 {image_path} 时出错: {e}") + return None + + def batch_predict(self, limit: int = 20, top_k: int = 5) -> list: + """ + 批量预测本地图片 + + Args: + limit: 限制处理的图片数量 + top_k: 返回置信度最高的前k个类别 + + Returns: + 包含所有预测结果的列表 + """ + results = [] + local_image_paths = self.local_image_paths[:limit] + + print(f"开始预测 {len(local_image_paths)} 张本地图片...") + start_time = time.time() # 记录开始时间 + for image_path in tqdm(local_image_paths): + result = self.predict_image_path(image_path, top_k) + if result: + results.append(result) + end_time = time.time() # 记录结束时间 + total_time = end_time - start_time # 计算总时间 + images_per_second = len(results) / total_time # 计算每秒处理的图片数量 + print(f"模型每秒可以处理 {images_per_second:.2f} 张图片") + return results + + def save_results(self, results: list, output_file: str = "caltech_predictions.json"): + """ + 保存预测结果到JSON文件 + + Args: + results: 预测结果列表 + output_file: 输出文件名 + """ + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"结果已保存到 {output_file}") + +# 主程序 +if __name__ == "__main__": + # 替换为本地模型路径 + LOCAL_MODEL_PATH = "/home/zhoushasha/models/microsoft_beit_base_patch16_224_pt22k_ft22k" + + # 替换为Caltech 256数据集文件夹路径 + CALTECH_256_PATH = "/home/zhoushasha/models/256ObjectCategoriesNew" + + local_image_paths = [] + true_labels = {} + + # 遍历Caltech 256数据集中的每个文件夹 + for folder in os.listdir(CALTECH_256_PATH): + folder_path = os.path.join(CALTECH_256_PATH, folder) + if os.path.isdir(folder_path): + # 获取文件夹名称中的类别名称 + class_name = folder.split('.', 1)[1] + # 获取文件夹中的所有图片文件 + image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))] + # 随机选择3张图片 + selected_images = random.sample(image_files, min(3, len(image_files))) + for image_path in selected_images: + local_image_paths.append(image_path) + true_labels[image_path] = class_name + + # 创建分类器实例 + classifier = COCOImageClassifier(LOCAL_MODEL_PATH, local_image_paths) + + # 批量预测 + results = classifier.batch_predict(limit=len(local_image_paths), top_k=3) + + # 保存结果 + classifier.save_results(results) + + # 打印简要统计 + print(f"\n处理完成: 成功预测 {len(results)} 张图片") + if results: + print("\n示例预测结果:") + sample = results[0] + print(f"图片路径: {sample['image_path']}") + for i, pred in enumerate(sample['predictions'], 1): + print(f"{i}. {pred['class_name']} (置信度: {pred['confidence']:.2%})") + + correct_count = 0 + total_count = len(results) + + # 统计每个类别的实际样本数和正确预测数 + class_actual_count = {} + class_correct_count = {} + + for prediction in results: + image_path = prediction['image_path'] + top1_prediction = max(prediction['predictions'], key=lambda x: x['confidence']) + predicted_class = top1_prediction['class_name'].lower() + true_class = true_labels.get(image_path).lower() + + # 统计每个类别的实际样本数 + if true_class not in class_actual_count: + class_actual_count[true_class] = 0 + class_actual_count[true_class] += 1 + + # 检查预测类别中的每个单词是否包含真实标签 + words = predicted_class.split() + for word in words: + if true_class in word: + correct_count += 1 + # 统计每个类别的正确预测数 + if true_class not in class_correct_count: + class_correct_count[true_class] = 0 + class_correct_count[true_class] += 1 + break + + accuracy = correct_count / total_count + print(f"\nAccuracy: {accuracy * 100:.2f}%") + + # 计算每个类别的召回率 + recall_per_class = {} + for class_name in class_actual_count: + if class_name in class_correct_count: + recall_per_class[class_name] = class_correct_count[class_name] / class_actual_count[class_name] + else: + recall_per_class[class_name] = 0 + + # 计算平均召回率 + average_recall = sum(recall_per_class.values()) / len(recall_per_class) + print(f"\nAverage Recall: {average_recall * 100:.2f}%") \ No newline at end of file diff --git a/model_test_caltech_cpu1.py b/model_test_caltech_cpu1.py new file mode 100644 index 0000000..deed2bd --- /dev/null +++ b/model_test_caltech_cpu1.py @@ -0,0 +1,197 @@ +import requests +import json +import torch +from PIL import Image +from io import BytesIO +from transformers import AutoImageProcessor, AutoModelForImageClassification +from tqdm import tqdm +import os +import random +import time + +# 强制使用CPU +device = torch.device("cpu") +print(f"当前使用的设备: {device}") + +class COCOImageClassifier: + def __init__(self, model_path: str, local_image_paths: list): + """初始化COCO图像分类器""" + self.processor = AutoImageProcessor.from_pretrained(model_path) + self.model = AutoModelForImageClassification.from_pretrained(model_path) + + # 将模型移动到CPU + self.model = self.model.to(device) + self.id2label = self.model.config.id2label + self.local_image_paths = local_image_paths + + def predict_image_path(self, image_path: str, top_k: int = 5) -> dict: + """ + 预测本地图片文件对应的图片类别 + + Args: + image_path: 本地图片文件路径 + top_k: 返回置信度最高的前k个类别 + + Returns: + 包含预测结果的字典 + """ + try: + # 打开图片 + image = Image.open(image_path).convert("RGB") + + # 预处理 + inputs = self.processor(images=image, return_tensors="pt") + + # 将输入数据移动到CPU + inputs = inputs.to(device) + + # 模型推理 + with torch.no_grad(): + outputs = self.model(**inputs) + + # 获取预测结果 + logits = outputs.logits + probs = torch.nn.functional.softmax(logits, dim=1) + top_probs, top_indices = probs.topk(top_k, dim=1) + + # 整理结果 + predictions = [] + for i in range(top_k): + class_idx = top_indices[0, i].item() + confidence = top_probs[0, i].item() + predictions.append({ + "class_id": class_idx, + "class_name": self.id2label[class_idx], + "confidence": confidence + }) + + return { + "image_path": image_path, + "predictions": predictions + } + + except Exception as e: + print(f"处理图片文件 {image_path} 时出错: {e}") + return None + + def batch_predict(self, limit: int = 20, top_k: int = 5) -> list: + """ + 批量预测本地图片 + + Args: + limit: 限制处理的图片数量 + top_k: 返回置信度最高的前k个类别 + + Returns: + 包含所有预测结果的列表 + """ + results = [] + local_image_paths = self.local_image_paths[:limit] + + print(f"开始预测 {len(local_image_paths)} 张本地图片...") + start_time = time.time() + for image_path in tqdm(local_image_paths): + result = self.predict_image_path(image_path, top_k) + if result: + results.append(result) + end_time = time.time() + + # 计算吞吐量 + throughput = len(results) / (end_time - start_time) + print(f"模型每秒可以处理 {throughput:.2f} 张图片") + + return results + + def save_results(self, results: list, output_file: str = "celtech_cpu_predictions.json"): + """ + 保存预测结果到JSON文件 + + Args: + results: 预测结果列表 + output_file: 输出文件名 + """ + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"结果已保存到 {output_file}") + +# 主程序 +if __name__ == "__main__": + # 替换为本地模型路径 + LOCAL_MODEL_PATH = "/home/zhoushasha/models/microsoft_beit_base_patch16_224_pt22k_ft22k" + + # 替换为Caltech 256数据集文件夹路径 New + CALTECH_256_PATH = "/home/zhoushasha/models/256ObjectCategoriesNew" + + local_image_paths = [] + true_labels = {} + + # 遍历Caltech 256数据集中的每个文件夹 + for folder in os.listdir(CALTECH_256_PATH): + folder_path = os.path.join(CALTECH_256_PATH, folder) + if os.path.isdir(folder_path): + # 获取文件夹名称中的类别名称 + class_name = folder.split('.', 1)[1] + # 获取文件夹中的所有图片文件 + image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))] + # 随机选择3张图片 + selected_images = random.sample(image_files, min(3, len(image_files))) + for image_path in selected_images: + local_image_paths.append(image_path) + true_labels[image_path] = class_name + + # 创建分类器实例 + classifier = COCOImageClassifier(LOCAL_MODEL_PATH, local_image_paths) + + # 批量预测 + results = classifier.batch_predict(limit=len(local_image_paths), top_k=3) + + # 保存结果 + classifier.save_results(results) + + # 打印简要统计 + print(f"\n处理完成: 成功预测 {len(results)} 张图片") + if results: + print("\n示例预测结果:") + sample = results[0] + print(f"图片路径: {sample['image_path']}") + for i, pred in enumerate(sample['predictions'], 1): + print(f"{i}. {pred['class_name']} (置信度: {pred['confidence']:.2%})") + + correct_count = 0 + total_count = len(results) + class_true_positives = {} + class_false_negatives = {} + + for prediction in results: + image_path = prediction['image_path'] + top1_prediction = max(prediction['predictions'], key=lambda x: x['confidence']) + predicted_class = top1_prediction['class_name'].lower() + true_class = true_labels.get(image_path).lower() + + if true_class not in class_true_positives: + class_true_positives[true_class] = 0 + class_false_negatives[true_class] = 0 + + # 检查预测类别中的每个单词是否包含真实标签 + words = predicted_class.split() + for word in words: + if true_class in word: + correct_count += 1 + class_true_positives[true_class] += 1 + break + else: + class_false_negatives[true_class] += 1 + + accuracy = correct_count / total_count + print(f"\nAccuracy: {accuracy * 100:.2f}%") + + # 计算召回率 + total_true_positives = 0 + total_false_negatives = 0 + for class_name in class_true_positives: + total_true_positives += class_true_positives[class_name] + total_false_negatives += class_false_negatives[class_name] + + recall = total_true_positives / (total_true_positives + total_false_negatives) + print(f"Recall: {recall * 100:.2f}%") \ No newline at end of file diff --git a/model_test_caltech_http.py b/model_test_caltech_http.py new file mode 100644 index 0000000..125e5bf --- /dev/null +++ b/model_test_caltech_http.py @@ -0,0 +1,166 @@ +import torch +import time +import os +import multiprocessing +from PIL import Image +from transformers import AutoImageProcessor, AutoModelForImageClassification +from flask import Flask, request, jsonify +from io import BytesIO + +# 设置CPU核心数为4 +os.environ["OMP_NUM_THREADS"] = "4" +os.environ["MKL_NUM_THREADS"] = "4" +os.environ["NUMEXPR_NUM_THREADS"] = "4" +os.environ["OPENBLAS_NUM_THREADS"] = "4" +os.environ["VECLIB_MAXIMUM_THREADS"] = "4" +torch.set_num_threads(4) # 设置PyTorch的CPU线程数 + +# 设备配置 +device_cuda = torch.device("cuda" if torch.cuda.is_available() else "cpu") +device_cpu = torch.device("cpu") +print(f"当前CUDA设备: {device_cuda}, CPU设备: {device_cpu}") +print(f"CPU核心数设置: {torch.get_num_threads()}") + +class ImageClassifier: + def __init__(self, model_path: str): + self.processor = AutoImageProcessor.from_pretrained(model_path) + + # 分别加载GPU和CPU模型实例 + if device_cuda.type == "cuda": + self.model_cuda = AutoModelForImageClassification.from_pretrained(model_path).to(device_cuda) + else: + self.model_cuda = None # 若没有CUDA,则不加载 + + self.model_cpu = AutoModelForImageClassification.from_pretrained(model_path).to(device_cpu) + + # 保存id2label映射 + self.id2label = self.model_cpu.config.id2label + + def _predict_with_model(self, image, model, device) -> dict: + """使用指定模型和设备执行预测,包含单独计时""" + try: + # 记录开始时间 + start_time = time.perf_counter() # 使用更精确的计时函数 + + # 处理图片并移动到目标设备 + inputs = self.processor(images=image, return_tensors="pt").to(device) + + with torch.no_grad(): + outputs = model(** inputs) + + logits = outputs.logits + probs = torch.nn.functional.softmax(logits, dim=1) + max_prob, max_idx = probs.max(dim=1) + class_idx = max_idx.item() + + # 计算处理时间(秒),保留6位小数 + processing_time = round(time.perf_counter() - start_time, 6) + + return { + "class_id": class_idx, + "class_name": self.id2label[class_idx], + "confidence": float(max_prob.item()), + "device_used": str(device), + "processing_time": processing_time # 处理时间 + } + except Exception as e: + return { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device), + "processing_time": 0.0, + "error": str(e) + } + + def predict_single_image(self, image) -> dict: + """预测单张图片,分别使用GPU和CPU模型""" + results = {"status": "success"} + + # GPU预测(如果可用) + if self.model_cuda is not None: + cuda_result = self._predict_with_model(image, self.model_cuda, device_cuda) + else: + cuda_result = { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device_cuda), + "processing_time": 0.0, + "error": "CUDA设备不可用,未加载CUDA模型" + } + results["cuda_prediction"] = cuda_result + + # CPU预测(已限制为4核心) + cpu_result = self._predict_with_model(image, self.model_cpu, device_cpu) + results["cpu_prediction"] = cpu_result + + return results + +# 初始化服务 +app = Flask(__name__) +MODEL_PATH = os.environ.get("MODEL_PATH", "/model") # 模型路径(环境变量或默认路径) +classifier = ImageClassifier(MODEL_PATH) + +@app.route('/v1/private/s782b4996', methods=['POST']) +def predict_single(): + """接收单张图片并返回预测结果及处理时间""" + if 'image' not in request.files: + return jsonify({ + "status": "error", + "cuda_prediction": { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device_cuda), + "processing_time": 0.0, + "error": "请求中未包含图片" + }, + "cpu_prediction": { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device_cpu), + "processing_time": 0.0, + "error": "请求中未包含图片" + } + }), 400 + + image_file = request.files['image'] + try: + image = Image.open(BytesIO(image_file.read())).convert("RGB") + result = classifier.predict_single_image(image) + return jsonify(result) + except Exception as e: + return jsonify({ + "status": "error", + "cuda_prediction": { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device_cuda), + "processing_time": 0.0, + "error": str(e) + }, + "cpu_prediction": { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device_cpu), + "processing_time": 0.0, + "error": str(e) + } + }), 500 + +@app.route('/health', methods=['GET']) +def health_check(): + return jsonify({ + "status": "healthy", + "cuda_available": device_cuda.type == "cuda", + "cuda_device": str(device_cuda), + "cpu_device": str(device_cpu), + "cpu_threads": torch.get_num_threads() # 显示CPU线程数 + }), 200 + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=80, debug=False) diff --git a/model_test_caltech_http_1.py b/model_test_caltech_http_1.py new file mode 100644 index 0000000..12f6d3f --- /dev/null +++ b/model_test_caltech_http_1.py @@ -0,0 +1,163 @@ +import requests +import json +import torch +from PIL import Image +from io import BytesIO +from transformers import AutoImageProcessor, AutoModelForImageClassification +from tqdm import tqdm +import os +import random +import time +from flask import Flask, request, jsonify # 引入Flask + +# 设备配置 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"当前使用的设备: {device}") + +class COCOImageClassifier: + def __init__(self, model_path: str): + """初始化分类器(移除local_image_paths参数,改为动态接收)""" + self.processor = AutoImageProcessor.from_pretrained(model_path) + self.model = AutoModelForImageClassification.from_pretrained(model_path) + self.model = self.model.to(device) + + if torch.cuda.device_count() > 1: + print(f"使用 {torch.cuda.device_count()} 块GPU") + self.model = torch.nn.DataParallel(self.model) + + self.id2label = self.model.module.config.id2label if hasattr(self.model, 'module') else self.model.config.id2label + + def predict_image_path(self, image_path: str, top_k: int = 5) -> dict: + """预测单张图片(复用原逻辑)""" + try: + image = Image.open(image_path).convert("RGB") + inputs = self.processor(images=image, return_tensors="pt").to(device) + + with torch.no_grad(): + outputs = self.model(** inputs) + + logits = outputs.logits + probs = torch.nn.functional.softmax(logits, dim=1) + top_probs, top_indices = probs.topk(top_k, dim=1) + + predictions = [] + for i in range(top_k): + class_idx = top_indices[0, i].item() + predictions.append({ + "class_id": class_idx, + "class_name": self.id2label[class_idx], + "confidence": top_probs[0, i].item() + }) + + return { + "image_path": image_path, + "predictions": predictions + } + except Exception as e: + print(f"处理图片 {image_path} 出错: {e}") + return None + + def batch_predict_and_evaluate(self, image_paths: list, true_labels: dict, top_k: int = 3) -> dict: + """批量预测并计算准确率、召回率""" + results = [] + start_time = time.time() + + for image_path in tqdm(image_paths): + result = self.predict_image_path(image_path, top_k) + if result: + results.append(result) + + end_time = time.time() + total_time = end_time - start_time + images_per_second = len(results) / total_time if total_time > 0 else 0 + + # 计算准确率和召回率(复用原逻辑) + correct_count = 0 + total_count = len(results) + class_actual_count = {} + class_correct_count = {} + + for prediction in results: + image_path = prediction['image_path'] + top1_prediction = max(prediction['predictions'], key=lambda x: x['confidence']) + predicted_class = top1_prediction['class_name'].lower() + true_class = true_labels.get(image_path, "").lower() + + # 统计每个类别的实际样本数 + class_actual_count[true_class] = class_actual_count.get(true_class, 0) + 1 + + # 检查预测是否正确 + words = predicted_class.split() + for word in words: + if true_class in word: + correct_count += 1 + class_correct_count[true_class] = class_correct_count.get(true_class, 0) + 1 + break + + # 计算指标 + accuracy = correct_count / total_count if total_count > 0 else 0 + recall_per_class = {} + for class_name in class_actual_count: + recall_per_class[class_name] = class_correct_count.get(class_name, 0) / class_actual_count[class_name] + + average_recall = sum(recall_per_class.values()) / len(recall_per_class) if recall_per_class else 0 + + # 返回包含指标的结果 + return { + "status": "success", + "metrics": { + "accuracy": round(accuracy * 100, 2), # 百分比 + "average_recall": round(average_recall * 100, 2), # 百分比 + "total_images": total_count, + "correct_predictions": correct_count, + "speed_images_per_second": round(images_per_second, 2) + }, + "sample_predictions": results[:3] # 示例预测结果(可选) + } + +# 初始化Flask服务 +app = Flask(__name__) +MODEL_PATH = os.environ.get("MODEL_PATH", "/model") # 容器内模型路径 +DATASET_PATH = os.environ.get("DATASET_PATH", "/app/dataset") # 容器内数据集路径 +classifier = COCOImageClassifier(MODEL_PATH) + +@app.route('/v1/private/s782b4996', methods=['POST']) +def evaluate(): + """接收请求并返回评估结果(准确率、召回率等)""" + try: + # 解析请求参数(可选:允许动态指定limit等参数) + data = request.get_json() + limit = data.get("limit", 20) # 限制处理的图片数量 + + # 加载数据集(容器内路径) + local_image_paths = [] + true_labels = {} + for folder in os.listdir(DATASET_PATH): + folder_path = os.path.join(DATASET_PATH, folder) + if os.path.isdir(folder_path): + class_name = folder.split('.', 1)[1] + image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))] + selected_images = random.sample(image_files, min(3, len(image_files))) + for image_path in selected_images: + local_image_paths.append(image_path) + true_labels[image_path] = class_name + + # 限制处理数量 + local_image_paths = local_image_paths[:limit] + + # 执行预测和评估 + result = classifier.batch_predict_and_evaluate(local_image_paths, true_labels, top_k=3) + return jsonify(result) + + except Exception as e: + return jsonify({ + "status": "error", + "message": str(e) + }), 500 + +@app.route('/health', methods=['GET']) +def health_check(): + return jsonify({"status": "healthy", "device": str(device)}), 200 + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=8000, debug=False) \ No newline at end of file diff --git a/model_test_caltech_http_3.py b/model_test_caltech_http_3.py new file mode 100644 index 0000000..803e884 --- /dev/null +++ b/model_test_caltech_http_3.py @@ -0,0 +1,89 @@ +import torch +from PIL import Image +from transformers import AutoImageProcessor, AutoModelForImageClassification +import os +from flask import Flask, request, jsonify +from io import BytesIO + +# 设备配置 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"当前使用的设备: {device}") + +class ImageClassifier: + def __init__(self, model_path: str): + # 获取模型路径下的第一个子目录(假设模型文件存放在这里) + subdirs = [d for d in os.listdir(model_path) if os.path.isdir(os.path.join(model_path, d))] + if not subdirs: + raise ValueError(f"在 {model_path} 下未找到任何子目录,无法加载模型") + + # 实际的模型文件路径 + actual_model_path = os.path.join(model_path, subdirs[0]) + print(f"加载模型从: {actual_model_path}") + + self.processor = AutoImageProcessor.from_pretrained(actual_model_path) + self.model = AutoModelForImageClassification.from_pretrained(actual_model_path) + self.model = self.model.to(device) + + if torch.cuda.device_count() > 1: + print(f"使用 {torch.cuda.device_count()} 块GPU") + self.model = torch.nn.DataParallel(self.model) + + self.id2label = self.model.module.config.id2label if hasattr(self.model, 'module') else self.model.config.id2label + + def predict_single_image(self, image) -> dict: + """预测单张图片,返回置信度最高的结果""" + try: + # 处理图片 + inputs = self.processor(images=image, return_tensors="pt").to(device) + + with torch.no_grad(): + outputs = self.model(** inputs) + + logits = outputs.logits + probs = torch.nn.functional.softmax(logits, dim=1) + # 获取置信度最高的预测结果 + max_prob, max_idx = probs.max(dim=1) + class_idx = max_idx.item() + + return { + "status": "success", + "top_prediction": { + "class_id": class_idx, + "class_name": self.id2label[class_idx], + "confidence": max_prob.item() + } + } + except Exception as e: + return { + "status": "error", + "message": str(e) + } + +# 初始化服务 +app = Flask(__name__) +MODEL_PATH = os.environ.get("MODEL_PATH", "/model") # 模型根路径(环境变量或默认路径) +classifier = ImageClassifier(MODEL_PATH) + +@app.route('/v1/private/s782b4996', methods=['POST']) +def predict_single(): + """接收单张图片并返回最高置信度预测结果""" + # 检查是否有图片上传 + if 'image' not in request.files: + return jsonify({"status": "error", "message": "请求中未包含图片"}), 400 + + image_file = request.files['image'] + try: + # 读取图片 + image = Image.open(BytesIO(image_file.read())).convert("RGB") + # 预测 + result = classifier.predict_single_image(image) + return jsonify(result) + except Exception as e: + return jsonify({"status": "error", "message": str(e)}), 500 + +@app.route('/health', methods=['GET']) +def health_check(): + return jsonify({"status": "healthy", "device": str(device)}), 200 + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=8000, debug=False) \ No newline at end of file diff --git a/model_test_caltech_http_cuda.py b/model_test_caltech_http_cuda.py new file mode 100644 index 0000000..2fec98c --- /dev/null +++ b/model_test_caltech_http_cuda.py @@ -0,0 +1,80 @@ +import torch +from PIL import Image +from transformers import AutoImageProcessor, AutoModelForImageClassification +import os +from flask import Flask, request, jsonify +from io import BytesIO + +# 设备配置 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"当前使用的设备: {device}") + +class ImageClassifier: + def __init__(self, model_path: str): + self.processor = AutoImageProcessor.from_pretrained(model_path) + self.model = AutoModelForImageClassification.from_pretrained(model_path) + self.model = self.model.to(device) + + if torch.cuda.device_count() > 1: + print(f"使用 {torch.cuda.device_count()} 块GPU") + self.model = torch.nn.DataParallel(self.model) + + self.id2label = self.model.module.config.id2label if hasattr(self.model, 'module') else self.model.config.id2label + + def predict_single_image(self, image) -> dict: + """预测单张图片,返回置信度最高的结果""" + try: + # 处理图片 + inputs = self.processor(images=image, return_tensors="pt").to(device) + + with torch.no_grad(): + outputs = self.model(**inputs) + + logits = outputs.logits + probs = torch.nn.functional.softmax(logits, dim=1) + # 获取置信度最高的预测结果 + max_prob, max_idx = probs.max(dim=1) + class_idx = max_idx.item() + + return { + "status": "success", + "top_prediction": { + "class_id": class_idx, + "class_name": self.id2label[class_idx], + "confidence": max_prob.item() + } + } + except Exception as e: + return { + "status": "error", + "message": str(e) + } + +# 初始化服务 +app = Flask(__name__) +MODEL_PATH = os.environ.get("MODEL_PATH", "/model") # 模型路径(环境变量或默认路径) +classifier = ImageClassifier(MODEL_PATH) + +@app.route('/v1/private/s782b4996', methods=['POST']) +def predict_single(): + """接收单张图片并返回最高置信度预测结果""" + # 检查是否有图片上传 + if 'image' not in request.files: + return jsonify({"status": "error", "message": "请求中未包含图片"}), 400 + + image_file = request.files['image'] + try: + # 读取图片 + image = Image.open(BytesIO(image_file.read())).convert("RGB") + # 预测 + result = classifier.predict_single_image(image) + return jsonify(result) + except Exception as e: + return jsonify({"status": "error", "message": str(e)}), 500 + +@app.route('/health', methods=['GET']) +def health_check(): + return jsonify({"status": "healthy", "device": str(device)}), 200 + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=80, debug=False) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4959125 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[tool.black] +line-length = 80 +target-version = ['py39'] + +[tool.flake8] +max-line-length = 88 +count=true +per-file-ignores="./annotation/manager.py:F401" +exclude=["./label", "__pycache__", "./migrations", "./logs", "./pids", "./resources"] +ignore=["W503", "E203"] +enable-extensions="G" +application-import-names=["flake8-isort", "flake8-logging-format", "flake8-builtins"] +import-order-style="edited" +extend-ignore = ["E203", "E701"] + +[tool.isort] +py_version=39 +profile="black" +multi_line_output=9 +line_length=80 +group_by_package=true +case_sensitive=true +skip_gitignore=true + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..84ac0e9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +requests +ruamel.yaml +regex +pyyaml +websocket-client==0.44.0 +pydantic==2.6.4 +pydantic_core==2.16.3 +Levenshtein +numpy +websockets +fabric +vmplatform==0.0.4 +flask diff --git a/run.py b/run.py new file mode 100644 index 0000000..08b7d5e --- /dev/null +++ b/run.py @@ -0,0 +1,114 @@ +import gc +import json +import os +import sys +import time +import zipfile + +import yaml +from schemas.context import ASRContext +from utils.client import Client +from utils.evaluator import BaseEvaluator +from utils.logger import logger +from utils.service import register_sut + +IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None +UNIT_TEST = os.getenv("UNIT_TEST", 0) + + +def main(): + logger.info("执行……") + + dataset_filepath = os.getenv( + "DATASET_FILEPATH", + "./tests/resources/en.zip", + ) + submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config") + result_filepath = os.getenv("RESULT_FILEPATH", "./out/result") + bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase") + detail_cases_filepath = os.getenv("DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl") + + resource_name = os.getenv("BENCHMARK_NAME") + + # 提交配置 & 启动被测服务 + if os.getenv("DATASET_FILEPATH", ""): + from utils.helm import resource_check + + with open(submit_config_filepath, "r") as fp: + st_config = yaml.safe_load(fp) + st_config["values"] = resource_check(st_config.get("values", {})) + if 'docker_images' in st_config: + sut_url = "ws://172.26.1.75:9827" + os.environ['test'] = '1' + elif 'docker_image' in st_config: + sut_url = register_sut(st_config, resource_name) + elif UNIT_TEST: + sut_url = "ws://172.27.231.36:80" + else: + logger.error("config 配置错误,没有 docker_image") + os._exit(1) + else: + os.environ['test'] = '1' + sut_url = "ws://172.27.231.36:80" + if UNIT_TEST: + exit(0) + + """ + # 数据集处理 + local_dataset_path = "./dataset" + os.makedirs(local_dataset_path, exist_ok=True) + with zipfile.ZipFile(dataset_filepath) as zf: + zf.extractall(local_dataset_path) + config_path = os.path.join(local_dataset_path, "data.yaml") + with open(config_path, "r") as fp: + dataset_config = yaml.safe_load(fp) + + # 数据集信息 + dataset_global_config = dataset_config.get("global", {}) + dataset_query = dataset_config.get("query_data", {}) + + evaluator = BaseEvaluator() + + # 开始预测 + for idx, query_item in enumerate(dataset_query): + gc.collect() + logger.info(f"开始执行 {idx} 条数据") + + context = ASRContext(**dataset_global_config) + context.lang = query_item.get("lang", context.lang) + context.file_path = os.path.join(local_dataset_path, query_item["file"]) + # context.audio_length = query_item["audio_length"] + + interactions = Client(sut_url, context).action() + context.append_labels(query_item["voice"]) + context.append_preds( + interactions["predict_data"], + interactions["send_time"], + interactions["recv_time"], + ) + context.fail = interactions["fail"] + if IN_TEST: + with open('output.txt', 'w') as fp: + original_stdout = sys.stdout + sys.stdout = fp + print(context) + sys.stdout = original_stdout + evaluator.evaluate(context) + detail_case = evaluator.gen_detail_case() + with open(detail_cases_filepath, "a") as fp: + fp.write(json.dumps(detail_case.to_dict(), ensure_ascii=False) + "\n") + time.sleep(4) + + evaluator.post_evaluate() + output_result = evaluator.gen_result() + # print(evaluator.__dict__) + logger.info("执行完成. Result = {output_result}") + + with open(result_filepath, "w") as fp: + json.dump(output_result, fp, indent=2, ensure_ascii=False) + with open(bad_cases_filepath, "w") as fp: + fp.write("当前榜单不存在 Bad Case\n") + """ + +if __name__ == "__main__": + main() diff --git a/run_async_a10.py b/run_async_a10.py new file mode 100644 index 0000000..df77568 --- /dev/null +++ b/run_async_a10.py @@ -0,0 +1,757 @@ +import atexit +import concurrent.futures +import fcntl +import gc +import glob +import json +import os +import random +import signal +import sys +import tempfile +import threading +import time +import zipfile +from concurrent.futures import ThreadPoolExecutor + +import yaml +from fabric import Connection +from vmplatform import VMOS, Client, VMDataDisk + +from schemas.context import ASRContext +from utils.client_async import ClientAsync +from utils.evaluator import BaseEvaluator +from utils.logger import logger +from utils.service import register_sut + +IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None +UNIT_TEST = os.getenv("UNIT_TEST", 0) + +DATASET_NUM = os.getenv("DATASET_NUM") + +# vm榜单参数 +SUT_TYPE = os.getenv("SUT_TYPE", "kubernetes") +SHARE_SUT = os.getenv("SHARE_SUT", "true") == "true" +VM_ID = 0 +VM_IP = "" +do_deploy_chart = True +VM_CPU = int(os.getenv("VM_CPU", "2")) +VM_MEM = int(os.getenv("VM_MEM", "4096")) +MODEL_BASEPATH = os.getenv("MODEL_BASEPATH", "/tmp/customer/leaderboard/pc_asr") +MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}")) +SSH_KEY_DIR = os.getenv("SSH_KEY_DIR", "/workspace") +SSH_PUBLIC_KEY_FILE = os.path.join(SSH_KEY_DIR, "ssh-key-ecdsa.pub") +SSH_KEY_FILE = os.path.join(SSH_KEY_DIR, "ssh-key-ecdsa") + +CONNECT_KWARGS = {"key_filename": SSH_KEY_FILE} + +# 共享sut参数 +JOB_ID = os.getenv("JOB_ID") +dirname = "/tmp/submit_private/sut_share" +os.makedirs(dirname, exist_ok=True) +SUT_SHARE_LOCK = os.path.join(dirname, "lock.lock") +SUT_SHARE_USE_LOCK = os.path.join(dirname, "use.lock") +SUT_SHARE_STATUS = os.path.join(dirname, "status.json") +SUT_SHARE_JOB_STATUS = os.path.join(dirname, f"job_status.{JOB_ID}") +SUT_SHARE_PUBLIC_FAIL = os.path.join(dirname, "one_job_failed") +fd_lock = open(SUT_SHARE_USE_LOCK, "a") + + +def clean_vm_atexit(): + global VM_ID, do_deploy_chart + if not VM_ID: + return + if not do_deploy_chart: + return + logger.info("删除vm") + vmclient = Client() + err_msg = vmclient.delete_vm(VM_ID) + if err_msg: + logger.warning(f"删除vm失败: {err_msg}") + + +def put_file_to_vm(c: Connection, local_path: str, remote_path: str): + logger.info(f"uploading file {local_path} to {remote_path}") + result = c.put(local_path, remote_path) + logger.info("uploaded {0.local} to {0.remote}".format(result)) + + +def deploy_windows_sut(): + global VM_ID + global VM_IP + + submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "") + with open(submit_config_filepath, "r") as fp: + st_config = yaml.safe_load(fp) + assert "model" in st_config, "未配置model" + assert "model_key" in st_config, "未配置model_key" + assert "config.json" in st_config, "未配置config.json" + nfs = st_config.get("leaderboard_options", {}).get("nfs", []) + assert len(nfs) > 0, "未配置nfs" + assert st_config["model"] in MODEL_MAPPING, "提交模型不在可用模型范围内" + + model = st_config["model"] + model_key = st_config["model_key"] + model_path = "" + config = st_config["config.json"] + exist = False + for nfs_item in nfs: + if nfs_item["name"] == model_key: + exist = True + if nfs_item["source"] == "ceph_customer": + model_path = os.path.join( + "/tmp/customer", + nfs_item["srcRelativePath"], + ) + else: + model_path = os.path.join( + "/tmp/juicefs", + nfs_item["srcRelativePath"], + ) + break + if not exist: + raise RuntimeError(f"未找到nfs配置项 name={model_key}") + config_path = os.path.join(tempfile.mkdtemp(), "config.json") + model_dir = os.path.basename(model_path).split(".")[0] + config["model_path"] = f"E:\\model\\{model_dir}" + with open(config_path, "w") as fp: + json.dump(config, fp, ensure_ascii=False, indent=4) + + vmclient = Client() + with open(SSH_PUBLIC_KEY_FILE, "r") as fp: + sshpublickey = fp.read().rstrip() + VM_ID = vmclient.create_vm( + "amd64", + VMOS.windows10, + VM_CPU, + VM_MEM, + "leaderboard-%s-submit-%s-job-%s" + % ( + os.getenv("BENCHMARK_NAME"), + os.getenv("SUBMIT_ID"), + os.getenv("JOB_ID"), + ), + sshpublickey, + datadisks=[ + VMDataDisk( + size=50, + disk_type="ssd", + mount_path="/", + filesystem="NTFS", + ) + ], + ) + atexit.register(clean_vm_atexit) + signal.signal(signal.SIGTERM, lambda signum, _: sys.exit(signum)) + VM_IP = vmclient.wait_until_vm_running(VM_ID) + logger.info("vm created successfully, vm_ip: %s", VM_IP) + + def sut_startup(): + with Connection( + VM_IP, + "administrator", + connect_kwargs=CONNECT_KWARGS, + ) as c: + script_path = "E:\\base\\asr\\faster-whisper\\server" + script_path = "E:\\install\\asr\\sensevoice\\server" + bat_filepath = f"{script_path}\\start.bat" + config_filepath = "E:\\submit\\config.json" + result = c.run("") + assert result.ok + c.run( + f'cd /d {script_path} & set "EDGE_ML_ENV_HOME=E:\\install" & {bat_filepath} {config_filepath}', + warn=True, + ) + + with Connection( + VM_IP, + "administrator", + connect_kwargs=CONNECT_KWARGS, + ) as c: + model_filepath = os.path.join(MODEL_BASEPATH, MODEL_MAPPING[model]) + filename = os.path.basename(model_filepath) + put_file_to_vm(c, model_filepath, "/E:/") + + result = c.run("mkdir E:\\base") + assert result.ok + result = c.run("mkdir E:\\model") + assert result.ok + result = c.run("mkdir E:\\submit") + assert result.ok + + result = c.run( + f"tar zxvf E:\\{filename} -C E:\\base --strip-components 1" + ) + assert result.ok + + result = c.run("E:\\base\\setup-win.bat E:\\install") + assert result.ok + + put_file_to_vm(c, config_path, "/E:/submit") + put_file_to_vm(c, model_path, "/E:/model") + result = c.run( + f"tar zxvf E:\\model\\{os.path.basename(model_path)} -C E:\\model" + ) + assert result.ok + threading.Thread(target=sut_startup, daemon=True).start() + time.sleep(60) + + return f"ws://{VM_IP}:{config['port']}" + + +def deploy_macos_sut(): + global VM_ID + global VM_IP + + submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "") + with open(submit_config_filepath, "r") as fp: + st_config = yaml.safe_load(fp) + assert "model" in st_config, "未配置model" + assert "model_key" in st_config, "未配置model_key" + assert "config.json" in st_config, "未配置config.json" + nfs = st_config.get("leaderboard_options", {}).get("nfs", []) + assert len(nfs) > 0, "未配置nfs" + assert st_config["model"] in MODEL_MAPPING, "提交模型不在可用模型范围内" + + model = st_config["model"] + model_key = st_config["model_key"] + model_path = "" + config = st_config["config.json"] + exist = False + for nfs_item in nfs: + if nfs_item["name"] == model_key: + exist = True + if nfs_item["source"] == "ceph_customer": + model_path = os.path.join( + "/tmp/customer", + nfs_item["srcRelativePath"], + ) + else: + model_path = os.path.join( + "/tmp/juicefs", + nfs_item["srcRelativePath"], + ) + break + if not exist: + raise RuntimeError(f"未找到nfs配置项 name={model_key}") + config_path = os.path.join(tempfile.mkdtemp(), "config.json") + model_dir = os.path.basename(model_path).split(".")[0] + + vmclient = Client() + with open(SSH_PUBLIC_KEY_FILE, "r") as fp: + sshpublickey = fp.read().rstrip() + VM_ID = vmclient.create_vm( + "amd64", + VMOS.macos12, + VM_CPU, + VM_MEM, + "leaderboard-%s-submit-%s-job-%s" + % ( + os.getenv("BENCHMARK_NAME"), + os.getenv("SUBMIT_ID"), + os.getenv("JOB_ID"), + ), + sshpublickey, + datadisks=[ + VMDataDisk( + size=50, + disk_type="ssd", + mount_path="/", + filesystem="apfs", + ) + ], + ) + atexit.register(clean_vm_atexit) + signal.signal(signal.SIGTERM, lambda signum, _: sys.exit(signum)) + VM_IP = vmclient.wait_until_vm_running(VM_ID) + logger.info("vm created successfully, vm_ip: %s", VM_IP) + + with Connection( + VM_IP, + "admin", + connect_kwargs=CONNECT_KWARGS, + ) as c: + result = c.run("ls -d /Volumes/data*") + assert result.ok + volume_path = result.stdout.strip() + + config["model_path"] = f"{volume_path}/model/{model_dir}" + with open(config_path, "w") as fp: + json.dump(config, fp, ensure_ascii=False, indent=4) + + def sut_startup(): + with Connection( + VM_IP, + "admin", + connect_kwargs=CONNECT_KWARGS, + ) as c: + script_path = f"{volume_path}/install/asr/sensevoice/server" + startsh = f"{script_path}/start.sh" + config_filepath = f"{volume_path}/submit/config.json" + c.run( + f"cd {script_path} && sh {startsh} {config_filepath}", + warn=True, + ) + + with Connection( + VM_IP, + "admin", + connect_kwargs=CONNECT_KWARGS, + ) as c: + model_filepath = os.path.join(MODEL_BASEPATH, MODEL_MAPPING[model]) + filename = os.path.basename(model_filepath) + put_file_to_vm(c, model_filepath, f"{volume_path}") + + result = c.run(f"mkdir {volume_path}/base") + assert result.ok + result = c.run(f"mkdir {volume_path}/model") + assert result.ok + result = c.run(f"mkdir {volume_path}/submit") + assert result.ok + + result = c.run( + f"tar zxvf {volume_path}/{filename} -C {volume_path}/base --strip-components 1" # noqa: E501 + ) + assert result.ok + + result = c.run( + f"sh {volume_path}/base/setup-mac.sh {volume_path}/install x64" + ) + assert result.ok + + put_file_to_vm(c, config_path, f"{volume_path}/submit") + put_file_to_vm(c, model_path, f"{volume_path}/model") + result = c.run( + f"tar zxvf {volume_path}/model/{os.path.basename(model_path)} -C {volume_path}/model" # noqa: E501 + ) + assert result.ok + threading.Thread(target=sut_startup, daemon=True).start() + time.sleep(60) + + return f"ws://{VM_IP}:{config['port']}" + + +def get_sut_url_vm(vm_type: str): + global VM_ID + global VM_IP + global do_deploy_chart + + do_deploy_chart = True + # 拉起SUT + + def check_job_failed(): + while True: + time.sleep(30) + if os.path.exists(SUT_SHARE_PUBLIC_FAIL): + logger.error("there is a job failed in current submit") + sys.exit(1) + + sut_url = "" + threading.Thread(target=check_job_failed, daemon=True).start() + if SHARE_SUT: + + time.sleep(10 * random.random()) + try: + open(SUT_SHARE_LOCK, "x").close() + except Exception: + do_deploy_chart = False + + start_at = time.time() + + def file_last_updated_at(file: str): + return os.stat(file).st_mtime if os.path.exists(file) else start_at + + if not do_deploy_chart: + with open(SUT_SHARE_JOB_STATUS, "w") as f: + f.write("waiting") + while ( + time.time() - file_last_updated_at(SUT_SHARE_STATUS) + <= 60 * 60 * 24 + ): + logger.info( + "Waiting sut application to be deployed by another job" + ) + time.sleep(10 + random.random()) + if os.path.exists(SUT_SHARE_STATUS): + get_status = False + for _ in range(10): + try: + with open(SUT_SHARE_STATUS, "r") as f: + status = json.load(f) + get_status = True + break + except Exception: + time.sleep(1 + random.random()) + continue + if not get_status: + raise RuntimeError( + "Failed to get status of sut application" + ) + assert ( + status.get("status") != "failed" + ), "Failed to deploy sut application, \ +please check other job logs" + if status.get("status") == "running": + VM_ID = status.get("vmid") + VM_IP = status.get("vmip") + sut_url = status.get("sut_url") + with open(SSH_PUBLIC_KEY_FILE, "w") as fp: + fp.write(status.get("pubkey")) + with open(SSH_KEY_FILE, "w") as fp: + fp.write(status.get("prikey")) + logger.info("Successfully get deployed sut application") + break + + if do_deploy_chart: + try: + fcntl.flock(fd_lock, fcntl.LOCK_EX) + with open(SUT_SHARE_JOB_STATUS, "w") as f: + f.write("waiting") + pending = True + + def update_status(): + while pending: + time.sleep(30) + if not pending: + break + with open(SUT_SHARE_STATUS, "w") as f: + json.dump({"status": "pending"}, f) + + threading.Thread(target=update_status, daemon=True).start() + if vm_type == "windows": + sut_url = deploy_windows_sut() + else: + sut_url = deploy_macos_sut() + except Exception: + open(SUT_SHARE_PUBLIC_FAIL, "w").close() + with open(SUT_SHARE_STATUS, "w") as f: + json.dump({"status": "failed"}, f) + raise + finally: + pending = False + with open(SUT_SHARE_STATUS, "w") as f: + pubkey = "" + with open(SSH_PUBLIC_KEY_FILE, "r") as fp: + pubkey = fp.read().rstrip() + prikey = "" + with open(SSH_KEY_FILE, "r") as fp: + prikey = fp.read() + json.dump( + { + "status": "running", + "vmid": VM_ID, + "vmip": VM_IP, + "pubkey": pubkey, + "sut_url": sut_url, + "prikey": prikey, + }, + f, + ) + else: + while True: + time.sleep(5 + random.random()) + try: + fcntl.flock(fd_lock, fcntl.LOCK_EX | fcntl.LOCK_NB) + break + except Exception: + logger.info("尝试抢占调用sut失败,继续等待 5s ...") + + with open(SUT_SHARE_JOB_STATUS, "w") as f: + f.write("running") + + return sut_url + + +def get_sut_url(): + if SUT_TYPE in ("windows", "macos"): + return get_sut_url_vm(SUT_TYPE) + + submit_config_filepath = os.getenv( + "SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config" + ) + CPU = os.getenv("SUT_CPU", "2") + MEMORY = os.getenv("SUT_MEMORY", "4Gi") + resource_name = os.getenv("BENCHMARK_NAME") + + # 任务信息 + # 斯拉夫语族:俄语、波兰语 + # 日耳曼语族:英语、德语、荷兰语 + # 拉丁语族(罗曼语族):西班牙语、葡萄牙语、法国语、意大利语 + # 闪米特语族:阿拉伯语、希伯来语 + + # 提交配置 & 启动被测服务 + if os.getenv("DATASET_FILEPATH", ""): + with open(submit_config_filepath, "r") as fp: + st_config = yaml.safe_load(fp) + if "values" not in st_config: + st_config["values"] = {} + st_config["values"]["resources"] = {} + st_config["values"]["resources"]["limits"] = {} + st_config["values"]["resources"]["limits"]["cpu"] = CPU + st_config["values"]["resources"]["limits"]["memory"] = MEMORY + # st_config["values"]['resources']['limits']['nvidia.com/gpu'] = '1' + # st_config["values"]['resources']['limits']['nvidia.com/gpumem'] = "1843" + # st_config["values"]['resources']['limits']['nvidia.com/gpucores'] = "8" + st_config["values"]["resources"]["requests"] = {} + st_config["values"]["resources"]["requests"]["cpu"] = CPU + st_config["values"]["resources"]["requests"]["memory"] = MEMORY + # st_config["values"]['resources']['requests']['nvidia.com/gpu'] = '1' + # st_config["values"]['resources']['requests']['nvidia.com/gpumem'] = "1843" + # st_config["values"]['resources']['requests']['nvidia.com/gpucores'] = "8" + # st_config['values']['nodeSelector'] = {} + # st_config["values"]["nodeSelector"][ + # "contest.4pd.io/accelerator" + # ] = "A10vgpu" + # st_config['values']['tolerations'] = [] + # toleration_item = {} + # toleration_item['key'] = 'hosttype' + # toleration_item['operator'] = 'Equal' + # toleration_item['value'] = 'vgpu' + # toleration_item['effect'] = 'NoSchedule' + # st_config['values']['tolerations'].append(toleration_item) + if os.getenv("RESOURCE_TYPE", "cpu") == "cpu": + values = st_config["values"] + limits = values.get("resources", {}).get("limits", {}) + requests = values.get("resources", {}).get("requests", {}) + if ( + "nvidia.com/gpu" in limits + or "nvidia.com/gpumem" in limits + or "nvidia.com/gpucores" in limits + or "nvidia.com/gpu" in requests + or "nvidia.com/gpumem" in requests + or "nvidia.com/gpucores" in requests + ): + raise Exception("禁止使用GPU!") + else: + vgpu_num = int(os.getenv("SUT_VGPU", "3")) + st_config["values"]["resources"]["limits"]["nvidia.com/gpu"] = ( + str(vgpu_num) + ) + st_config["values"]["resources"]["limits"][ + "nvidia.com/gpumem" + ] = str(1843 * vgpu_num) + st_config["values"]["resources"]["limits"][ + "nvidia.com/gpucores" + ] = str(8 * vgpu_num) + st_config["values"]["resources"]["requests"][ + "nvidia.com/gpu" + ] = str(vgpu_num) + st_config["values"]["resources"]["requests"][ + "nvidia.com/gpumem" + ] = str(1843 * vgpu_num) + st_config["values"]["resources"]["requests"][ + "nvidia.com/gpucores" + ] = str(8 * vgpu_num) + st_config["values"]["nodeSelector"] = {} + st_config["values"]["nodeSelector"][ + "contest.4pd.io/accelerator" + ] = "A10vgpu" + st_config["values"]["tolerations"] = [] + toleration_item = {} + toleration_item["key"] = "hosttype" + toleration_item["operator"] = "Equal" + toleration_item["value"] = "vgpu" + toleration_item["effect"] = "NoSchedule" + st_config["values"]["tolerations"].append(toleration_item) + if "docker_images" in st_config: + sut_url = "ws://172.26.1.75:9827" + os.environ["test"] = "1" + elif "docker_image" in st_config: + sut_url = register_sut(st_config, resource_name) + elif UNIT_TEST: + sut_url = "ws://172.27.231.36:80" + else: + logger.error("config 配置错误,没有 docker_image") + os._exit(1) + return sut_url + else: + os.environ["test"] = "1" + sut_url = "ws://172.27.231.36:80" + sut_url = "ws://172.26.1.75:9827" + return sut_url + + +def load_merge_dataset(dataset_filepath: str) -> dict: + local_dataset_path = "./dataset" + os.makedirs(local_dataset_path, exist_ok=True) + with zipfile.ZipFile(dataset_filepath) as zf: + zf.extractall(local_dataset_path) + + config = {} + sub_datasets = os.listdir(local_dataset_path) + for sub_dataset in sub_datasets: + if sub_dataset.startswith("asr."): + lang = sub_dataset[4:] + lang_path = os.path.join(local_dataset_path, lang) + os.makedirs(lang_path, exist_ok=True) + with zipfile.ZipFile( + os.path.join(local_dataset_path, sub_dataset) + ) as zf: + zf.extractall(lang_path) + lang_config_path = os.path.join(lang_path, "data.yaml") + with open(lang_config_path, "r") as fp: + lang_config = yaml.safe_load(fp) + audio_lengths = {} + for query_item in lang_config.get("query_data", []): + audio_path = os.path.join( + lang_path, + query_item["file"], + ) + query_item["file"] = audio_path + audio_lengths[query_item["file"]] = os.path.getsize( + audio_path, + ) + lang_config["query_data"] = sorted( + lang_config.get("query_data", []), + key=lambda x: audio_lengths[x["file"]], + reverse=True, + ) + + idx = 0 + length = 0.0 + for query_item in lang_config["query_data"]: + audio_length = audio_lengths[query_item["file"]] + length += audio_length / 32000 + idx += 1 + # 每个语言限制半个小时长度 + if length >= 30 * 60: + break + + lang_config["query_data"] = lang_config["query_data"][:idx] + config[lang] = lang_config + + config["query_data"] = [] + for lang, lang_config in config.items(): + if lang == "query_data": + continue + for query_item in lang_config["query_data"]: + config["query_data"].append( + { + **query_item, + "lang": lang, + } + ) + random.Random(0).shuffle(config["query_data"]) + + return config + + +def postprocess_failed(): + open(SUT_SHARE_PUBLIC_FAIL, "w").close() + + +def main(): + dataset_filepath = os.getenv( + "DATASET_FILEPATH", + "/Users/4paradigm/Projects/dataset/asr/de.zip", + # "./tests/resources/en.zip", + ) + result_filepath = os.getenv("RESULT_FILEPATH", "./out/result") + bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase") + detail_cases_filepath = os.getenv( + "DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl" + ) + thread_num = int(os.getenv("THREAD_NUM", "1")) + + # 数据集处理 + config = {} + if os.getenv("MERGE_DATASET", "1"): + config = load_merge_dataset(dataset_filepath) + dataset_query = config["query_data"] + else: + local_dataset_path = "./dataset" + os.makedirs(local_dataset_path, exist_ok=True) + with zipfile.ZipFile(dataset_filepath) as zf: + zf.extractall(local_dataset_path) + config_path = os.path.join(local_dataset_path, "data.yaml") + with open(config_path, "r") as fp: + dataset_config = yaml.safe_load(fp) + # 读取所有的音频,进而获得音频的总长度,最后按照音频长度对 query_data 进行降序排序 + lang = os.getenv("lang") + if lang is None: + lang = dataset_config.get("global", {}).get("lang", "en") + audio_lengths = [] + for query_item in dataset_config.get("query_data", []): + query_item["lang"] = lang + audio_path = os.path.join(local_dataset_path, query_item["file"]) + query_item["file"] = audio_path + audio_lengths.append(os.path.getsize(audio_path) / 1024 / 1024) + dataset_config["query_data"] = sorted( + dataset_config.get("query_data", []), + key=lambda x: audio_lengths[dataset_config["query_data"].index(x)], + reverse=True, + ) + # 数据集信息 + # dataset_global_config = dataset_config.get("global", {}) + dataset_query = dataset_config.get("query_data", {}) + config[lang] = dataset_config + + # sut url + sut_url = get_sut_url() + + try: + # 开始测试 + logger.info("开始执行") + evaluator = BaseEvaluator() + future_list = [] + with ThreadPoolExecutor(max_workers=thread_num) as executor: + for idx, query_item in enumerate(dataset_query): + context = ASRContext( + **config[query_item["lang"]].get("global", {}), + ) + context.lang = query_item["lang"] + context.file_path = query_item["file"] + context.append_labels(query_item["voice"]) + future = executor.submit( + ClientAsync(sut_url, context, idx).action + ) + future_list.append(future) + for future in concurrent.futures.as_completed(future_list): + context = future.result() + evaluator.evaluate(context) + detail_case = evaluator.gen_detail_case() + with open(detail_cases_filepath, "a") as fp: + fp.write( + json.dumps( + detail_case.to_dict(), + ensure_ascii=False, + ) + + "\n", + ) + del context + gc.collect() + + evaluator.post_evaluate() + output_result = evaluator.gen_result() + logger.info("执行完成") + + with open(result_filepath, "w") as fp: + json.dump(output_result, fp, indent=2, ensure_ascii=False) + with open(bad_cases_filepath, "w") as fp: + fp.write("当前榜单不存在 Bad Case\n") + + if SHARE_SUT: + with open(SUT_SHARE_JOB_STATUS, "w") as f: + f.write("success") + + fcntl.flock(fd_lock, fcntl.LOCK_UN) + fd_lock.close() + while SHARE_SUT and do_deploy_chart: + time.sleep(30) + success_num = 0 + for job_status_file in glob.glob(dirname + "/job_status.*"): + with open(job_status_file, "r") as f: + job_status = f.read() + success_num += job_status == "success" + if success_num == int(DATASET_NUM): + break + logger.info("Waiting for all jobs to complete") + except Exception: + if SHARE_SUT: + postprocess_failed() + raise + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/run_callback.py b/run_callback.py new file mode 100644 index 0000000..0a64919 --- /dev/null +++ b/run_callback.py @@ -0,0 +1,923 @@ +import json +import os +import sys +import time +import tempfile +import zipfile +import threading +from collections import defaultdict +from typing import Dict, List + +import yaml +from pydantic import ValidationError + +from schemas.dataset import QueryData +from utils.client_callback import ClientCallback, EvaluateResult, StopException +from utils.logger import log +from utils.service import register_sut +from utils.update_submit import change_product_available +from utils.file import dump_json, load_yaml, unzip_dir, load_json, write_file, dump_yaml +from utils.leaderboard import change_product_unavailable + + +lck = threading.Lock() + +# Environment variables by leaderboard +DATASET_FILEPATH = os.environ["DATASET_FILEPATH"] +RESULT_FILEPATH = os.environ["RESULT_FILEPATH"] + +DETAILED_CASES_FILEPATH = os.environ["DETAILED_CASES_FILEPATH"] +SUBMIT_CONFIG_FILEPATH = os.environ["SUBMIT_CONFIG_FILEPATH"] +BENCHMARK_NAME = os.environ["BENCHMARK_NAME"] +TEST_CONCURRENCY = int(os.getenv('TEST_CONCURRENCY', 1)) +THRESHOLD_OMCER = float(os.getenv('THRESHOLD_OMCER', 0.8)) + +log.info(f"DATASET_FILEPATH: {DATASET_FILEPATH}") +workspace_path = "/tmp/workspace" + + +# Environment variables by kubernetes +MY_POD_IP = os.environ["MY_POD_IP"] + +# constants +RESOURCE_NAME = BENCHMARK_NAME + +# Environment variables by judge_flow_config +LANG = os.getenv("lang") +SUT_CPU = os.getenv("SUT_CPU", "2") +SUT_MEMORY = os.getenv("SUT_MEMORY", "4Gi") +SUT_VGPU = os.getenv("SUT_VGPU", "1") +#SUT_VGPU_MEM = os.getenv("SUT_VGPU_MEM", str(1843 * int(SUT_VGPU))) +#SUT_VGPU_CORES = os.getenv("SUT_VGPU_CORES", str(8 * int(SUT_VGPU))) +SUT_VGPU_ACCELERATOR = os.getenv("SUT_VGPU_ACCELERATOR", "iluvatar-BI-V100") +RESOURCE_TYPE = os.getenv("RESOURCE_TYPE", "vgpu") +assert RESOURCE_TYPE in [ + "cpu", + "vgpu", +], "benchmark judge_flow_config error: RESOURCE_TYPE should be cpu or vgpu" + + +unzip_dir(DATASET_FILEPATH, workspace_path) + +def get_sut_url_kubernetes(): + with open(SUBMIT_CONFIG_FILEPATH, "r") as f: + submit_config = yaml.safe_load(f) + assert isinstance(submit_config, dict) + + submit_config.setdefault("values", {}) + + submit_config["values"]["containers"] = [ + { + "name": "corex-container", + "image": "harbor.4pd.io/lab-platform/inf/python:3.9", #镜像 + "command": ["sleep"], # 替换为你的模型启动命令,使用python解释器 + "args": ["3600"], # 替换为你的模型参数,运行我的推理脚本 + + # 添加存储卷挂载 + #"volumeMounts": [ + # { + # "name": "model-volume", + # "mountPath": "/model" # 挂载到/model目录 + # } + #] + } + ] + + """ + # 添加存储卷配置 + submit_config["values"]["volumes"] = [ + { + "name": "model-volume", + "persistentVolumeClaim": { + "claimName": "sid-model-pvc" # 使用已有的PVC + } + } + ] + """ + + """ + # Inject specified cpu and memory + resource = { + "cpu": SUT_CPU, + "memory": SUT_MEMORY, + } + """ + submit_config["values"]["resources"] = { + "requests":{}, + "limits": {}, + } + + limits = submit_config["values"]["resources"]["limits"] + requests = submit_config["values"]["resources"]["requests"] + + + """ + # ########## 关键修改:替换为iluvatar GPU配置 ########## + if RESOURCE_TYPE == "vgpu": # 假设你的模型需要GPU + # 替换nvidia资源键为iluvatar.ai/gpu + vgpu_resource = { + "iluvatar.ai/gpu": SUT_VGPU, # 对应你的GPU资源键 + # 若需要其他资源(如显存),按你的K8s配置补充,例如: + # "iluvatar.ai/gpumem": SUT_VGPU_MEM, + } + limits.update(vgpu_resource) + requests.update(vgpu_resource) + # 节点选择器:替换为你的accelerator标签 + submit_config["values"]["nodeSelector"] = { + "contest.4pd.io/accelerator": "iluvatar-BI-V100" # 你的节点标签 + } + # 容忍度:替换为你的tolerations配置 + submit_config["values"]["tolerations"] = [ + { + "key": "hosttype", + "operator": "Equal", + "value": "iluvatar", + "effect": "NoSchedule", + } + ] + # ######################################### + # 禁止CPU模式下使用GPU资源(保持原逻辑) + else: + if "iluvatar.ai/gpu" in limits or "iluvatar.ai/gpu" in requests: + log.error("禁止在CPU模式下使用GPU资源") + sys.exit(1) + + + + #gpukeys = ["iluvatar.ai/gpu"] # 检查iluvatar GPU键 + #for key in gpukeys: + # if key in limits or key in requests: + # log.error("禁止使用vgpu资源") + # sys.exit(1) + + """ + + # 替换nvidia资源键为iluvatar.ai/gpu + vgpu_resource = { + "iluvatar.ai/gpu": SUT_VGPU, # 对应你的GPU资源键 + # 若需要其他资源(如显存),按你的K8s配置补充,例如: + # "iluvatar.ai/gpumem": SUT_VGPU_MEM, + } + limits.update(vgpu_resource) + requests.update(vgpu_resource) + # 节点选择器:替换为你的accelerator标签 + submit_config["values"]["nodeSelector"] = { + "contest.4pd.io/accelerator": "iluvatar-BI-V100" # 你的节点标签 + } + # 容忍度:替换为你的tolerations配置 + """ + submit_config["values"]["tolerations"] = [ + { + "key": "hosttype", + "operator": "Equal", + "value": "iluvatar", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "arm64", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "myinit", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "middleware", + "effect": "NoSchedule", + } + + ] + """ + """ + { + "key": "node-role.kubernetes.io/master", + "operator": "Exists", + "effect": "NoSchedule", + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + """ + + + log.info(f"submit_config: {submit_config}") + log.info(f"RESOURCE_NAME: {RESOURCE_NAME}") + + return register_sut(submit_config, RESOURCE_NAME).replace( + "ws://", "http://" + ) + + +def get_sut_url(): + return get_sut_url_kubernetes() + +#SUT_URL = get_sut_url() +#os.environ["SUT_URL"] = SUT_URL + + + +############################################################################# + +import requests +import base64 + +def gen_req_body(apiname, APPId, file_path=None, featureId=None, featureInfo=None, dstFeatureId=None): + """ + 生成请求的body + :param apiname + :param APPId: Appid + :param file_name: 文件路径 + :return: + """ + if apiname == 'createFeature': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "createFeature", + "groupId": "test_voiceprint_e", + "featureId": featureId, + "featureInfo": featureInfo, + "createFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'createGroup': + + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "createGroup", + "groupId": "test_voiceprint_e", + "groupName": "vip_user", + "groupInfo": "store_vip_user_voiceprint", + "createGroupRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'deleteFeature': + + body = { + "header": { + "app_id": APPId, + "status": 3 + + }, + "parameter": { + "s782b4996": { + "func": "deleteFeature", + "groupId": "iFLYTEK_examples_groupId", + "featureId": "iFLYTEK_examples_featureId", + "deleteFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'queryFeatureList': + + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "queryFeatureList", + "groupId": "user_voiceprint_2", + "queryFeatureListRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'searchFea': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "searchFea", + "groupId": "test_voiceprint_e", + "topK": 1, + "searchFeaRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'searchScoreFea': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "searchScoreFea", + "groupId": "test_voiceprint_e", + "dstFeatureId": dstFeatureId, + "searchScoreFeaRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'updateFeature': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "updateFeature", + "groupId": "iFLYTEK_examples_groupId", + "featureId": "iFLYTEK_examples_featureId", + "featureInfo": "iFLYTEK_examples_featureInfo_update", + "updateFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'deleteGroup': + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "deleteGroup", + "groupId": "iFLYTEK_examples_groupId", + "deleteGroupRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + else: + raise Exception( + "输入的apiname不在[createFeature, createGroup, deleteFeature, queryFeatureList, searchFea, searchScoreFea,updateFeature]内,请检查") + return body + + + +log.info(f"开始请求获取到SUT服务URL") +# 获取SUT服务URL +sut_url = get_sut_url() +print(f"获取到的SUT_URL: {sut_url}") # 调试输出 +log.info(f"获取到SUT服务URL: {sut_url}") + +from urllib.parse import urlparse + +# 全局变量 +text_decoded = None + +###################################新增新增################################ +def req_url(api_name, APPId, file_path=None, featureId=None, featureInfo=None, dstFeatureId=None): + """ + 开始请求 + :param APPId: APPID + :param file_path: body里的文件路径 + :return: + """ + + global text_decoded + + body = gen_req_body(apiname=api_name, APPId=APPId, file_path=file_path, featureId=featureId, featureInfo=featureInfo, dstFeatureId=dstFeatureId) + #request_url = 'https://ai-cloud.4paradigm.com:9443/sid/v1/private/s782b4996' + + #request_url = 'https://sut:80/sid/v1/private/s782b4996' + + #headers = {'content-type': "application/json", 'host': 'ai-cloud.4paradigm.com', 'appid': APPId} + + parsed_url = urlparse(sut_url) + headers = {'content-type': "application/json", 'host': parsed_url.hostname, 'appid': APPId} + + # 1. 首先测试服务健康检查 + response = requests.get(f"{sut_url}/health") + print(response.status_code, response.text) + + + # 请求头 + headers = {"Content-Type": "application/json"} + # 请求体(可指定限制处理的图片数量) + body = {"limit": 20 } # 可选参数,限制处理的图片总数 + + # 发送POST请求 + response = requests.post( + f"{sut_url}/v1/private/s782b4996", + data=json.dumps(body), + headers=headers + ) + + # 解析响应结果 + if response.status_code == 200: + result = response.json() + print("预测评估结果:") + print(f"准确率: {result['metrics']['accuracy']}%") + print(f"平均召回率: {result['metrics']['average_recall']}%") + print(f"处理图片总数: {result['metrics']['total_images']}") + else: + print(f"请求失败,状态码: {response.status_code}") + print(f"错误信息: {response.text}") + + + + + # 添加基本认证信息 + auth = ('llm', 'Rmf4#LcG(iFZrjU;2J') + #response = requests.post(request_url, data=json.dumps(body), headers=headers, auth=auth) + + #response = requests.post(sut_url + "/predict", data=json.dumps(body), headers=headers, auth=auth) + #response = requests.post(f"{sut_url}/sid/v1/private/s782b4996", data=json.dumps(body), headers=headers, auth=auth) + """ + response = requests.post(f"{sut_url}/v1/private/s782b4996", data=json.dumps(body), headers=headers) + """ + + + + + #print("HTTP状态码:", response.status_code) + #print("原始响应内容:", response.text) # 先打印原始内容 + #print(f"请求URL: {sut_url + '/v1/private/s782b4996'}") + #print(f"请求headers: {headers}") + #print(f"请求body: {body}") + + + + #tempResult = json.loads(response.content.decode('utf-8')) + #print(tempResult) + + """ + # 对text字段进行Base64解码 + if 'payload' in tempResult and 'updateFeatureRes' in tempResult['payload']: + text_encoded = tempResult['payload']['updateFeatureRes']['text'] + text_decoded = base64.b64decode(text_encoded).decode('utf-8') + print(f"Base64解码后的text字段内容: {text_decoded}") + """ + + #text_encoded = tempResult['payload']['updateFeatureRes']['text'] + #text_decoded = base64.b64decode(text_encoded).decode('utf-8') + #print(f"Base64解码后的text字段内容: {text_decoded}") + + + # 获取响应的 JSON 数据 + result = response.json() + with open(RESULT_FILEPATH, "w") as f: + json.dump(result, f, indent=4, ensure_ascii=False) + print(f"结果已成功写入 {RESULT_FILEPATH}") + +submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config") +result_filepath = os.getenv("RESULT_FILEPATH", "./out/result") +bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase") +#detail_cases_filepath = os.getenv("DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl") + +from typing import Any, Dict, List + +def result2file( + result: Dict[str, Any], + detail_cases: List[Dict[str, Any]] = None +): + assert result_filepath is not None + assert bad_cases_filepath is not None + #assert detailed_cases_filepath is not None + + if result is not None: + with open(result_filepath, "w") as f: + json.dump(result, f, indent=4, ensure_ascii=False) + #if LOCAL_TEST: + # logger.info(f'result:\n {json.dumps(result, indent=4)}') + """ + if detail_cases is not None: + with open(detailed_cases_filepath, "w") as f: + json.dump(detail_cases, f, indent=4, ensure_ascii=False) + if LOCAL_TEST: + logger.info(f'result:\n {json.dumps(detail_cases, indent=4)}') + """ + + +def test_image_prediction(sut_url, image_path): + """发送单张图片到服务端预测""" + url = f"{sut_url}/v1/private/s782b4996" + + try: + with open(image_path, 'rb') as f: + files = {'image': f} + response = requests.post(url, files=files, timeout=30) + + result = response.json() + if result.get('status') != 'success': + return None, f"服务端错误: {result.get('message')}" + + return result, None + except Exception as e: + return None, f"请求错误: {str(e)}" + + + +import random +import time +#from tqdm import tqdm +import os +import requests + +if __name__ == '__main__': + + print(f"\n===== main开始请求接口 ===============================================") + # 1. 首先测试服务健康检查 + + print(f"\n===== 服务健康检查 ===================================================") + response = requests.get(f"{sut_url}/health") + print(response.status_code, response.text) + + """ + # 本地图片路径和真实标签(根据实际情况修改) + image_path = "/path/to/your/test_image.jpg" + true_label = "cat" # 图片的真实标签 + """ + + + """ + # 请求头 + headers = {"Content-Type": "application/json"} + # 请求体(可指定限制处理的图片数量) + body = {"limit": 20 } # 可选参数,限制处理的图片总数 + + # 发送POST请求 + response = requests.post( + f"{sut_url}/v1/private/s782b4996", + data=json.dumps(body), + headers=headers + ) + """ + + """ + # 读取图片文件 + with open(image_path, 'rb') as f: + files = {'image': f} + # 发送POST请求 + response = requests.post(f"{sut_url}/v1/private/s782b4996", files=files) + + + # 解析响应结果 + if response.status_code == 200: + result = response.json() + print("预测评估结果:") + print(f"准确率: {result['metrics']['accuracy']}%") + print(f"平均召回率: {result['metrics']['average_recall']}%") + print(f"处理图片总数: {result['metrics']['total_images']}") + else: + print(f"请求失败,状态码: {response.status_code}") + print(f"错误信息: {response.text}") + """ + + + ############################################################################################### + dataset_root = "/tmp/workspace/256ObjectCategoriesNew" # 数据集根目录 + samples_per_class = 3 # 每个类别抽取的样本数 + image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif') # 支持的图片格式 + + # 结果统计变量 + total_samples = 0 + #correct_predictions = 0 + + # GPU统计 + gpu_true_positives = 0 + gpu_false_positives = 0 + gpu_false_negatives = 0 + gpu_total_processing_time = 0.0 + + # CPU统计 + cpu_true_positives = 0 + cpu_false_positives = 0 + cpu_false_negatives = 0 + cpu_total_processing_time = 0.0 + + """ + # 遍历所有类别文件夹 + for folder_name in tqdm(os.listdir(dataset_root), desc="处理类别"): + folder_path = os.path.join(dataset_root, folder_name) + + + # 提取类别名(从"序号.name"格式中提取name部分) + class_name = folder_name.split('.', 1)[1].strip().lower() + + # 获取文件夹中所有图片 + image_files = [] + for file in os.listdir(folder_path): + if file.lower().endswith(image_extensions): + image_files.append(os.path.join(folder_path, file)) + + # 随机抽取指定数量的图片(如果不足则取全部) + selected_images = random.sample( + image_files, + min(samples_per_class, len(image_files)) + ) + + # 处理选中的图片 + for img_path in selected_images: + total_count += 1 + + # 发送预测请求 + prediction, error = test_image_prediction(sut_url, img_path) + if error: + print(f"处理图片 {img_path} 失败: {error}") + continue + + # 解析预测结果 + pred_class = prediction.get('class_name', '').lower() + confidence = prediction.get('confidence', 0) + + # 判断是否预测正确(真实类别是否在预测类别中) + if class_name in pred_class: + correct_predictions += 1 + + + # 可选:打印详细结果 + print(f"图片: {os.path.basename(img_path)} | 真实: {class_name} | 预测: {pred_class} | 置信度: {confidence:.4f} | {'正确' if is_correct else '错误'}") + """ + + # 遍历所有类别文件夹 + for folder_name in os.listdir(dataset_root): + folder_path = os.path.join(dataset_root, folder_name) + + # 跳过非文件夹的项目 + if not os.path.isdir(folder_path): + continue + + # 提取类别名(从"序号.name"格式中提取name部分) + try: + class_name = folder_name.split('.', 1)[1].strip().lower() + except IndexError: + print(f"警告:文件夹 {folder_name} 命名格式不正确,跳过该文件夹") + continue + + # 获取文件夹中所有图片 + image_files = [] + for file in os.listdir(folder_path): + file_path = os.path.join(folder_path, file) + if os.path.isfile(file_path) and file.lower().endswith(image_extensions): + image_files.append(file_path) + + # 随机抽取指定数量的图片(如果不足则取全部) + selected_images = random.sample( + image_files, + min(samples_per_class, len(image_files)) + ) + + for img_path in selected_images: + total_samples += 1 + + # 获取预测结果 + prediction, error = test_image_prediction(sut_url, img_path) + + # 打印test_image_prediction返回的结果 + print(f"test_image_prediction返回的prediction: {prediction}") + print(f"test_image_prediction返回的error: {error}") + + if error: + print(f"处理图片 {img_path} 失败: {error}") + continue + + + + # 解析GPU预测结果 + gpu_pred = prediction.get('cuda_prediction', {}) + gpu_pred_class = gpu_pred.get('class_name', '').lower() + gpu_processing_time = gpu_pred.get('processing_time', 0.0) + + # 解析CPU预测结果 + cpu_pred = prediction.get('cpu_prediction', {}) + cpu_pred_class = cpu_pred.get('class_name', '').lower() + cpu_processing_time = cpu_pred.get('processing_time', 0.0) + + # 判断GPU预测是否正确 + gpu_is_correct = class_name in gpu_pred_class + if gpu_is_correct: + gpu_true_positives += 1 + else: + gpu_false_positives += 1 + gpu_false_negatives += 1 + + # 判断CPU预测是否正确 + cpu_is_correct = class_name in cpu_pred_class + if cpu_is_correct: + cpu_true_positives += 1 + else: + cpu_false_positives += 1 + cpu_false_negatives += 1 + + # 累加处理时间 + gpu_total_processing_time += gpu_processing_time + cpu_total_processing_time += cpu_processing_time + + # 打印详细结果 + print(f"图片: {os.path.basename(img_path)} | 真实: {class_name}") + print(f"GPU预测: {gpu_pred_class} | {'正确' if gpu_is_correct else '错误'} | 耗时: {gpu_processing_time:.6f}s") + print(f"CPU预测: {cpu_pred_class} | {'正确' if cpu_is_correct else '错误'} | 耗时: {cpu_processing_time:.6f}s") + print("-" * 50) + + """ + # 计算整体指标(在单标签场景下,准确率=召回率) + if total_samples == 0: + overall_accuracy = 0.0 + overall_recall = 0.0 + else: + overall_accuracy = correct_predictions / total_samples + overall_recall = correct_predictions / total_samples # 整体召回率 + + # 输出统计结果 + print("\n" + "="*50) + print(f"测试总结:") + print(f"总测试样本数: {total_samples}") + print(f"正确预测样本数: {correct_predictions}") + print(f"整体准确率: {overall_accuracy:.4f} ({correct_predictions}/{total_samples})") + print(f"整体召回率: {overall_recall:.4f} ({correct_predictions}/{total_samples})") + print("="*50) + """ + + # 初始化结果字典 + result = { + # GPU指标 + "gpu_accuracy": 0.0, + "gpu_recall": 0.0, + "gpu_running_time": round(gpu_total_processing_time, 6), + "gpu_throughput": 0.0, + + # CPU指标 + "cpu_accuracy": 0.0, + "cpu_recall": 0.0, + "cpu_running_time": round(cpu_total_processing_time, 6), + "cpu_throughput": 0.0 + } + + # 计算GPU指标 + gpu_accuracy = gpu_true_positives / total_samples * 100 + gpu_recall_denominator = gpu_true_positives + gpu_false_negatives + gpu_recall = gpu_true_positives / gpu_recall_denominator * 100 if gpu_recall_denominator > 0 else 0 + gpu_throughput = total_samples / gpu_total_processing_time if gpu_total_processing_time > 1e-6 else 0 + + # 计算CPU指标 + cpu_accuracy = cpu_true_positives / total_samples * 100 + cpu_recall_denominator = cpu_true_positives + cpu_false_negatives + cpu_recall = cpu_true_positives / cpu_recall_denominator * 100 if cpu_recall_denominator > 0 else 0 + cpu_throughput = total_samples / cpu_total_processing_time if cpu_total_processing_time > 1e-6 else 0 + + # 更新结果字典 + result.update({ + "gpu_accuracy": round(gpu_accuracy, 6), + "gpu_recall": round(gpu_recall, 6), + "gpu_throughput": round(gpu_throughput, 6), + + "cpu_accuracy": round(cpu_accuracy, 6), + "cpu_recall": round(cpu_recall, 6), + "cpu_throughput": round(cpu_throughput, 6) + }) + + + # 打印最终统计结果 + print("\n" + "="*50) + print(f"总样本数: {total_samples}") + print("\nGPU指标:") + print(f"准确率: {result['gpu_accuracy']:.4f}%") + print(f"召回率: {result['gpu_recall']:.4f}%") + print(f"总运行时间: {result['gpu_running_time']:.6f}s") + print(f"吞吐量: {result['gpu_throughput']:.2f}张/秒") + + print("\nCPU指标:") + print(f"准确率: {result['cpu_accuracy']:.4f}%") + print(f"召回率: {result['cpu_recall']:.4f}%") + print(f"总运行时间: {result['cpu_running_time']:.6f}s") + print(f"吞吐量: {result['cpu_throughput']:.2f}张/秒") + print("="*50) + + + #result = {} + #result['accuracy_1_1'] = 3 + result2file(result) + + if abs(gpu_accuracy - cpu_accuracy) > 3: + log.error(f"gpu与cpu准确率差别超过3%,模型结果不正确") + change_product_unavailable() + + """ + if result['accuracy_1_1'] < 0.9: + log.error(f"1:1正确率未达到90%, 视为产品不可用") + change_product_unavailable() + + + if result['accuracy_1_N'] < 1: + log.error(f"1:N正确率未达到100%, 视为产品不可用") + change_product_unavailable() + if result['1_1_latency'] > 0.5: + log.error(f"1:1平均latency超过0.5s, 视为产品不可用") + change_product_unavailable() + if result['1_N_latency'] > 0.5: + log.error(f"1:N平均latency超过0.5s, 视为产品不可用") + change_product_unavailable() + if result['enroll_latency'] > 1: + log.error(f"enroll(入库)平均latency超过1s, 视为产品不可用") + change_product_unavailable() + """ + exit_code = 0 + + diff --git a/run_callback_cuda.py b/run_callback_cuda.py new file mode 100644 index 0000000..a5dc1dc --- /dev/null +++ b/run_callback_cuda.py @@ -0,0 +1,1193 @@ +import json +import os +import sys +import time +import tempfile +import zipfile +import threading +from collections import defaultdict +from typing import Dict, List + +import yaml +from pydantic import ValidationError + +from schemas.dataset import QueryData +from utils.client_callback import ClientCallback, EvaluateResult, StopException +from utils.logger import log +from utils.service import register_sut +from utils.update_submit import change_product_available +from utils.file import dump_json, load_yaml, unzip_dir, load_json, write_file, dump_yaml +from utils.leaderboard import change_product_unavailable + + +lck = threading.Lock() + +# Environment variables by leaderboard +DATASET_FILEPATH = os.environ["DATASET_FILEPATH"] +RESULT_FILEPATH = os.environ["RESULT_FILEPATH"] + +DETAILED_CASES_FILEPATH = os.environ["DETAILED_CASES_FILEPATH"] +SUBMIT_CONFIG_FILEPATH = os.environ["SUBMIT_CONFIG_FILEPATH"] +BENCHMARK_NAME = os.environ["BENCHMARK_NAME"] +TEST_CONCURRENCY = int(os.getenv('TEST_CONCURRENCY', 1)) +THRESHOLD_OMCER = float(os.getenv('THRESHOLD_OMCER', 0.8)) + +log.info(f"DATASET_FILEPATH: {DATASET_FILEPATH}") +workspace_path = "/tmp/workspace" + + +# Environment variables by kubernetes +MY_POD_IP = os.environ["MY_POD_IP"] + +# constants +RESOURCE_NAME = BENCHMARK_NAME + +# Environment variables by judge_flow_config +LANG = os.getenv("lang") +SUT_CPU = os.getenv("SUT_CPU", "2") +SUT_MEMORY = os.getenv("SUT_MEMORY", "4Gi") +SUT_VGPU = os.getenv("SUT_VGPU", "1") +#SUT_VGPU_MEM = os.getenv("SUT_VGPU_MEM", str(1843 * int(SUT_VGPU))) +#SUT_VGPU_CORES = os.getenv("SUT_VGPU_CORES", str(8 * int(SUT_VGPU))) +SUT_VGPU_ACCELERATOR = os.getenv("SUT_VGPU_ACCELERATOR", "iluvatar-BI-V100") +RESOURCE_TYPE = os.getenv("RESOURCE_TYPE", "vgpu") +assert RESOURCE_TYPE in [ + "cpu", + "vgpu", +], "benchmark judge_flow_config error: RESOURCE_TYPE should be cpu or vgpu" + + +unzip_dir(DATASET_FILEPATH, workspace_path) + +def get_sut_url_kubernetes(): + with open(SUBMIT_CONFIG_FILEPATH, "r") as f: + submit_config = yaml.safe_load(f) + assert isinstance(submit_config, dict) + + submit_config.setdefault("values", {}) + + submit_config["values"]["containers"] = [ + { + "name": "corex-container", + "image": "harbor.4pd.io/lab-platform/inf/python:3.9", #镜像 + "command": ["sleep"], # 替换为你的模型启动命令,使用python解释器 + "args": ["3600"], # 替换为你的模型参数,运行我的推理脚本 + + # 添加存储卷挂载 + #"volumeMounts": [ + # { + # "name": "model-volume", + # "mountPath": "/model" # 挂载到/model目录 + # } + #] + } + ] + + """ + # 添加存储卷配置 + submit_config["values"]["volumes"] = [ + { + "name": "model-volume", + "persistentVolumeClaim": { + "claimName": "sid-model-pvc" # 使用已有的PVC + } + } + ] + """ + + """ + # Inject specified cpu and memory + resource = { + "cpu": SUT_CPU, + "memory": SUT_MEMORY, + } + """ + submit_config["values"]["resources"] = { + "requests":{}, + "limits": {}, + } + + limits = submit_config["values"]["resources"]["limits"] + requests = submit_config["values"]["resources"]["requests"] + + + """ + # ########## 关键修改:替换为iluvatar GPU配置 ########## + if RESOURCE_TYPE == "vgpu": # 假设你的模型需要GPU + # 替换nvidia资源键为iluvatar.ai/gpu + vgpu_resource = { + "iluvatar.ai/gpu": SUT_VGPU, # 对应你的GPU资源键 + # 若需要其他资源(如显存),按你的K8s配置补充,例如: + # "iluvatar.ai/gpumem": SUT_VGPU_MEM, + } + limits.update(vgpu_resource) + requests.update(vgpu_resource) + # 节点选择器:替换为你的accelerator标签 + submit_config["values"]["nodeSelector"] = { + "contest.4pd.io/accelerator": "iluvatar-BI-V100" # 你的节点标签 + } + # 容忍度:替换为你的tolerations配置 + submit_config["values"]["tolerations"] = [ + { + "key": "hosttype", + "operator": "Equal", + "value": "iluvatar", + "effect": "NoSchedule", + } + ] + # ######################################### + # 禁止CPU模式下使用GPU资源(保持原逻辑) + else: + if "iluvatar.ai/gpu" in limits or "iluvatar.ai/gpu" in requests: + log.error("禁止在CPU模式下使用GPU资源") + sys.exit(1) + + + + #gpukeys = ["iluvatar.ai/gpu"] # 检查iluvatar GPU键 + #for key in gpukeys: + # if key in limits or key in requests: + # log.error("禁止使用vgpu资源") + # sys.exit(1) + + """ + + # 替换nvidia资源键为iluvatar.ai/gpu + vgpu_resource = { + "iluvatar.ai/gpu": SUT_VGPU, # 对应你的GPU资源键 + # 若需要其他资源(如显存),按你的K8s配置补充,例如: + # "iluvatar.ai/gpumem": SUT_VGPU_MEM, + } + limits.update(vgpu_resource) + requests.update(vgpu_resource) + # 节点选择器:替换为你的accelerator标签 + submit_config["values"]["nodeSelector"] = { + "contest.4pd.io/accelerator": "iluvatar-BI-V100" # 你的节点标签 + } + # 容忍度:替换为你的tolerations配置 + """ + submit_config["values"]["tolerations"] = [ + { + "key": "hosttype", + "operator": "Equal", + "value": "iluvatar", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "arm64", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "myinit", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "middleware", + "effect": "NoSchedule", + } + + ] + """ + """ + { + "key": "node-role.kubernetes.io/master", + "operator": "Exists", + "effect": "NoSchedule", + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + """ + + + log.info(f"submit_config: {submit_config}") + log.info(f"RESOURCE_NAME: {RESOURCE_NAME}") + + return register_sut(submit_config, RESOURCE_NAME).replace( + "ws://", "http://" + ) + + +def get_sut_url(): + return get_sut_url_kubernetes() + +#SUT_URL = get_sut_url() +#os.environ["SUT_URL"] = SUT_URL + +""" +def load_dataset( + dataset_filepath: str, +) -> Dict[str, List[QueryData]]: + dataset_path = tempfile.mkdtemp() + + with zipfile.ZipFile(dataset_filepath) as zf: + zf.extractall(dataset_path) + + basename = os.path.basename(dataset_filepath) + datayaml = os.path.join(dataset_path, "data.yaml") + if not os.path.exists(datayaml): + sub_dataset_paths = os.listdir(dataset_path) + dataset = {} + for sub_dataset_path in sub_dataset_paths: + sub_dataset = load_dataset( + os.path.join(dataset_path, sub_dataset_path) + ) + for k, v in sub_dataset.items(): + k = os.path.join(basename, k) + dataset[k] = v + return dataset + + with open(datayaml, "r") as f: + data = yaml.safe_load(f) + assert isinstance(data, dict) + + lang = LANG + data_lang = data.get("global", {}).get("lang") + if lang is None and data_lang is not None: + if data_lang is not None: + # 使用配置中的语言类型 + lang = data_lang + if lang is None and basename.startswith("asr.") and len(basename) == 4 + 2: + # 数据集名称为asr.en 可以认为语言为en + lang = basename[4:] + if lang is None: + log.error( + "数据集错误 通过data.yaml中的 global.lang 或 数据集名称 asr.xx 指定语言类型" + ) + sys.exit(1) + + query_data = data.get("query_data", []) + audio_size_map = {} + for query in query_data: + query["lang"] = lang + query["file"] = os.path.join(dataset_path, query["file"]) + audio_size_map[query["file"]] = os.path.getsize(query["file"]) + # 根据音频大小排序 + query_data = sorted( + query_data, key=lambda x: audio_size_map[x["file"]], reverse=True + ) + valid_query_data = [] + for i_query_data in query_data: + try: + valid_query_data.append(QueryData.model_validate(i_query_data)) + except ValidationError: + log.error("数据集错误 数据中query_data格式错误") + sys.exit(1) + + return { + basename: valid_query_data, + } + + +def merge_query_data(dataset: Dict[str, List[QueryData]]) -> List[QueryData]: + query_datas = [] + for query_data in dataset.values(): + query_datas.extend(query_data) + return query_datas + + +def run_one_predict( + client: ClientCallback, query_data: QueryData, task_id: str +) -> EvaluateResult: + try: + client.predict(None, query_data.file, query_data.duration, task_id) + except StopException: + sys.exit(1) + + client.finished.wait() + + if client.error is not None: + sys.exit(1) + + client.app_on = False + + try: + with lck: + ret = client.evaluate(query_data) + return ret + except StopException: + sys.exit(1) +""" + +""" +def predict_task( + client: ClientCallback, task_id: int, query_data: QueryData, test_results: list +): + log.info(f"Task-{task_id}开始评测") + test_results[task_id] = run_one_predict(client, query_data, str(task_id)) + + +def merge_concurrent_result(evaluate_results: List[EvaluateResult]) -> Dict: + cer = 0.0 + align_start = {} + align_end = {} + first_word_distance_sum = 0.0 + last_word_distance_sum = 0.0 + rtf = 0.0 + first_receive_delay: float = 0.0 + query_count: int = 0 + voice_count: int = 0 + pred_punctuation_num: int = 0 + label_punctuation_num: int = 0 + pred_sentence_punctuation_num: int = 0 + label_setence_punctuation_num: int = 0 + + for evalute_result in evaluate_results: + cer += evalute_result.cer + for k, v in evalute_result.align_start.items(): + align_start.setdefault(k, 0) + align_start[k] += v + for k, v in evalute_result.align_end.items(): + align_end.setdefault(k, 0) + align_end[k] += v + first_word_distance_sum += evalute_result.first_word_distance_sum + last_word_distance_sum += evalute_result.last_word_distance_sum + rtf += evalute_result.rtf + first_receive_delay += evalute_result.first_receive_delay + query_count += evalute_result.query_count + voice_count += evalute_result.voice_count + pred_punctuation_num += evalute_result.pred_punctuation_num + label_punctuation_num += evalute_result.label_punctuation_num + pred_sentence_punctuation_num += ( + evalute_result.pred_sentence_punctuation_num + ) + label_setence_punctuation_num += ( + evalute_result.label_setence_punctuation_num + ) + lens = len(evaluate_results) + cer /= lens + for k, v in align_start.items(): + align_start[k] /= voice_count + for k, v in align_end.items(): + align_end[k] /= voice_count + first_word_distance = first_word_distance_sum / voice_count + last_word_distance = last_word_distance_sum / voice_count + rtf /= lens + first_receive_delay /= lens + json_result = { + "one_minus_cer": 1 - cer, + "first_word_distance_mean": first_word_distance, + "last_word_distance_mean": last_word_distance, + "query_count": query_count // lens, + "voice_count": voice_count // lens, + "rtf": rtf, + "first_receive_delay": first_receive_delay, + "punctuation_ratio": ( + pred_punctuation_num / label_punctuation_num + if label_punctuation_num > 0 + else 1.0 + ), + "sentence_punctuation_ratio": ( + pred_sentence_punctuation_num / label_setence_punctuation_num + if label_setence_punctuation_num > 0 + else 1.0 + ), + } + for k, v in align_start.items(): + json_result["start_word_%dms_ratio" % k] = v + for k, v in align_end.items(): + json_result["end_word_%dms_ratio" % k] = v + + return json_result + + +def merge_result(result: Dict[str, List[EvaluateResult]]) -> Dict: + json_result = {} + for lang, evaluate_results in result.items(): + if len(evaluate_results) == 0: + continue + cer = 0.0 + align_start = {} + align_end = {} + first_word_distance_sum = 0.0 + last_word_distance_sum = 0.0 + rtf = 0.0 + first_receive_delay: float = 0.0 + query_count: int = 0 + voice_count: int = 0 + pred_punctuation_num: int = 0 + label_punctuation_num: int = 0 + pred_sentence_punctuation_num: int = 0 + label_setence_punctuation_num: int = 0 + for evalute_result in evaluate_results: + cer += evalute_result.cer + for k, v in evalute_result.align_start.items(): + align_start.setdefault(k, 0) + align_start[k] += v + for k, v in evalute_result.align_end.items(): + align_end.setdefault(k, 0) + align_end[k] += v + first_word_distance_sum += evalute_result.first_word_distance_sum + last_word_distance_sum += evalute_result.last_word_distance_sum + rtf += evalute_result.rtf + first_receive_delay += evalute_result.first_receive_delay + query_count += evalute_result.query_count + voice_count += evalute_result.voice_count + pred_punctuation_num += evalute_result.pred_punctuation_num + label_punctuation_num += evalute_result.label_punctuation_num + pred_sentence_punctuation_num += ( + evalute_result.pred_sentence_punctuation_num + ) + label_setence_punctuation_num += ( + evalute_result.label_setence_punctuation_num + ) + lens = len(evaluate_results) + cer /= lens + for k, v in align_start.items(): + align_start[k] /= voice_count + for k, v in align_end.items(): + align_end[k] /= voice_count + first_word_distance = first_word_distance_sum / voice_count + last_word_distance = last_word_distance_sum / voice_count + rtf /= lens + first_receive_delay /= lens + lang_result = { + "one_minus_cer": 1 - cer, + "first_word_distance_mean": first_word_distance, + "last_word_distance_mean": last_word_distance, + "query_count": 1, + "voice_count": voice_count, + "rtf": rtf, + "first_receive_delay": first_receive_delay, + "punctuation_ratio": ( + pred_punctuation_num / label_punctuation_num + if label_punctuation_num > 0 + else 1.0 + ), + "sentence_punctuation_ratio": ( + pred_sentence_punctuation_num / label_setence_punctuation_num + if label_setence_punctuation_num > 0 + else 1.0 + ), + } + for k, v in align_start.items(): + lang_result["start_word_%dms_ratio" % k] = v + for k, v in align_end.items(): + lang_result["end_word_%dms_ratio" % k] = v + if lang == "": + json_result.update(lang_result) + else: + json_result[lang] = lang_result + return json_result +""" + +""" +def main(): + log.info(f'{TEST_CONCURRENCY=}, {THRESHOLD_OMCER=}') + dataset = load_dataset(DATASET_FILEPATH) + query_datas = merge_query_data(dataset) + + #获取 ASR 服务 URL(通常从 Kubernetes 配置) + sut_url = get_sut_url() + + #创建多个客户端实例(每个客户端监听不同端口,如 80、81、82...) + port_base = 80 + clients = [ClientCallback(sut_url, port_base + i) for i in range(TEST_CONCURRENCY)] + + #准备测试数据与线程 + detail_cases = [] + # we use the same test data for all requests + query_data = query_datas[0] + + test_results = [None] * len(clients) + test_threads = [threading.Thread(target=predict_task, args=(client, task_id, query_data, test_results)) + for task_id, client in enumerate(clients)] + + #启动并发测试,启动线程并间隔10秒,设置超时时间为1小时 + for t in test_threads: + t.start() + time.sleep(10) + [t.join(timeout=3600) for t in test_threads] + + #合并结果与评估 + final_result = merge_concurrent_result(test_results) + product_avaiable = all([c.product_avaiable for c in clients]) + + final_result['concurrent_req'] = TEST_CONCURRENCY + if final_result['one_minus_cer'] < THRESHOLD_OMCER: + product_avaiable = False + + if not product_avaiable: + final_result['success'] = False + change_product_available() + else: + final_result['success'] = True + + #保存结果, + log.info( + "指标结果为: %s", json.dumps(final_result, indent=2, ensure_ascii=False) + ) + + time.sleep(120) + #打印并保存最终结果到文件 + with open(RESULT_FILEPATH, "w") as f: + json.dump(final_result, f, indent=2, ensure_ascii=False) + #保存详细测试用例结果 + with open(DETAILED_CASES_FILEPATH, "w") as f: + json.dump(detail_cases, f, indent=2, ensure_ascii=False) +""" + +############################################################################# + +import requests +import base64 + +def gen_req_body(apiname, APPId, file_path=None, featureId=None, featureInfo=None, dstFeatureId=None): + """ + 生成请求的body + :param apiname + :param APPId: Appid + :param file_name: 文件路径 + :return: + """ + if apiname == 'createFeature': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "createFeature", + "groupId": "test_voiceprint_e", + "featureId": featureId, + "featureInfo": featureInfo, + "createFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'createGroup': + + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "createGroup", + "groupId": "test_voiceprint_e", + "groupName": "vip_user", + "groupInfo": "store_vip_user_voiceprint", + "createGroupRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'deleteFeature': + + body = { + "header": { + "app_id": APPId, + "status": 3 + + }, + "parameter": { + "s782b4996": { + "func": "deleteFeature", + "groupId": "iFLYTEK_examples_groupId", + "featureId": "iFLYTEK_examples_featureId", + "deleteFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'queryFeatureList': + + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "queryFeatureList", + "groupId": "user_voiceprint_2", + "queryFeatureListRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'searchFea': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "searchFea", + "groupId": "test_voiceprint_e", + "topK": 1, + "searchFeaRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'searchScoreFea': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "searchScoreFea", + "groupId": "test_voiceprint_e", + "dstFeatureId": dstFeatureId, + "searchScoreFeaRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'updateFeature': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "updateFeature", + "groupId": "iFLYTEK_examples_groupId", + "featureId": "iFLYTEK_examples_featureId", + "featureInfo": "iFLYTEK_examples_featureInfo_update", + "updateFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'deleteGroup': + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "deleteGroup", + "groupId": "iFLYTEK_examples_groupId", + "deleteGroupRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + else: + raise Exception( + "输入的apiname不在[createFeature, createGroup, deleteFeature, queryFeatureList, searchFea, searchScoreFea,updateFeature]内,请检查") + return body + + + +log.info(f"开始请求获取到SUT服务URL") +# 获取SUT服务URL +sut_url = get_sut_url() +print(f"获取到的SUT_URL: {sut_url}") # 调试输出 +log.info(f"获取到SUT服务URL: {sut_url}") + +from urllib.parse import urlparse + +# 全局变量 +text_decoded = None + +###################################新增新增################################ +def req_url(api_name, APPId, file_path=None, featureId=None, featureInfo=None, dstFeatureId=None): + """ + 开始请求 + :param APPId: APPID + :param file_path: body里的文件路径 + :return: + """ + + global text_decoded + + body = gen_req_body(apiname=api_name, APPId=APPId, file_path=file_path, featureId=featureId, featureInfo=featureInfo, dstFeatureId=dstFeatureId) + #request_url = 'https://ai-cloud.4paradigm.com:9443/sid/v1/private/s782b4996' + + #request_url = 'https://sut:80/sid/v1/private/s782b4996' + + #headers = {'content-type': "application/json", 'host': 'ai-cloud.4paradigm.com', 'appid': APPId} + + parsed_url = urlparse(sut_url) + headers = {'content-type': "application/json", 'host': parsed_url.hostname, 'appid': APPId} + + # 1. 首先测试服务健康检查 + response = requests.get(f"{sut_url}/health") + print(response.status_code, response.text) + + + # 请求头 + headers = {"Content-Type": "application/json"} + # 请求体(可指定限制处理的图片数量) + body = {"limit": 20 } # 可选参数,限制处理的图片总数 + + # 发送POST请求 + response = requests.post( + f"{sut_url}/v1/private/s782b4996", + data=json.dumps(body), + headers=headers + ) + + # 解析响应结果 + if response.status_code == 200: + result = response.json() + print("预测评估结果:") + print(f"准确率: {result['metrics']['accuracy']}%") + print(f"平均召回率: {result['metrics']['average_recall']}%") + print(f"处理图片总数: {result['metrics']['total_images']}") + else: + print(f"请求失败,状态码: {response.status_code}") + print(f"错误信息: {response.text}") + + + + + # 添加基本认证信息 + auth = ('llm', 'Rmf4#LcG(iFZrjU;2J') + #response = requests.post(request_url, data=json.dumps(body), headers=headers, auth=auth) + + #response = requests.post(sut_url + "/predict", data=json.dumps(body), headers=headers, auth=auth) + #response = requests.post(f"{sut_url}/sid/v1/private/s782b4996", data=json.dumps(body), headers=headers, auth=auth) + """ + response = requests.post(f"{sut_url}/v1/private/s782b4996", data=json.dumps(body), headers=headers) + """ + + + + + #print("HTTP状态码:", response.status_code) + #print("原始响应内容:", response.text) # 先打印原始内容 + #print(f"请求URL: {sut_url + '/v1/private/s782b4996'}") + #print(f"请求headers: {headers}") + #print(f"请求body: {body}") + + + + #tempResult = json.loads(response.content.decode('utf-8')) + #print(tempResult) + + """ + # 对text字段进行Base64解码 + if 'payload' in tempResult and 'updateFeatureRes' in tempResult['payload']: + text_encoded = tempResult['payload']['updateFeatureRes']['text'] + text_decoded = base64.b64decode(text_encoded).decode('utf-8') + print(f"Base64解码后的text字段内容: {text_decoded}") + """ + + #text_encoded = tempResult['payload']['updateFeatureRes']['text'] + #text_decoded = base64.b64decode(text_encoded).decode('utf-8') + #print(f"Base64解码后的text字段内容: {text_decoded}") + + + # 获取响应的 JSON 数据 + result = response.json() + with open(RESULT_FILEPATH, "w") as f: + json.dump(result, f, indent=4, ensure_ascii=False) + print(f"结果已成功写入 {RESULT_FILEPATH}") + +submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config") +result_filepath = os.getenv("RESULT_FILEPATH", "./out/result") +bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase") +#detail_cases_filepath = os.getenv("DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl") + +from typing import Any, Dict, List + +def result2file( + result: Dict[str, Any], + detail_cases: List[Dict[str, Any]] = None +): + assert result_filepath is not None + assert bad_cases_filepath is not None + #assert detailed_cases_filepath is not None + + if result is not None: + with open(result_filepath, "w") as f: + json.dump(result, f, indent=4, ensure_ascii=False) + #if LOCAL_TEST: + # logger.info(f'result:\n {json.dumps(result, indent=4)}') + """ + if detail_cases is not None: + with open(detailed_cases_filepath, "w") as f: + json.dump(detail_cases, f, indent=4, ensure_ascii=False) + if LOCAL_TEST: + logger.info(f'result:\n {json.dumps(detail_cases, indent=4)}') + """ + + +def test_image_prediction(sut_url, image_path): + """发送单张图片到服务端预测""" + url = f"{sut_url}/v1/private/s782b4996" + + try: + with open(image_path, 'rb') as f: + files = {'image': f} + response = requests.post(url, files=files, timeout=30) + + result = response.json() + if result.get('status') != 'success': + return None, f"服务端错误: {result.get('message')}" + + return result.get('top_prediction'), None + except Exception as e: + return None, f"请求错误: {str(e)}" + + + +import random +import time +#from tqdm import tqdm +import os +import requests + +if __name__ == '__main__': + + print(f"\n===== main开始请求接口 ===============================================") + # 1. 首先测试服务健康检查 + + print(f"\n===== 服务健康检查 ===================================================") + response = requests.get(f"{sut_url}/health") + print(response.status_code, response.text) + + """ + # 本地图片路径和真实标签(根据实际情况修改) + image_path = "/path/to/your/test_image.jpg" + true_label = "cat" # 图片的真实标签 + """ + + + """ + # 请求头 + headers = {"Content-Type": "application/json"} + # 请求体(可指定限制处理的图片数量) + body = {"limit": 20 } # 可选参数,限制处理的图片总数 + + # 发送POST请求 + response = requests.post( + f"{sut_url}/v1/private/s782b4996", + data=json.dumps(body), + headers=headers + ) + """ + + """ + # 读取图片文件 + with open(image_path, 'rb') as f: + files = {'image': f} + # 发送POST请求 + response = requests.post(f"{sut_url}/v1/private/s782b4996", files=files) + + + # 解析响应结果 + if response.status_code == 200: + result = response.json() + print("预测评估结果:") + print(f"准确率: {result['metrics']['accuracy']}%") + print(f"平均召回率: {result['metrics']['average_recall']}%") + print(f"处理图片总数: {result['metrics']['total_images']}") + else: + print(f"请求失败,状态码: {response.status_code}") + print(f"错误信息: {response.text}") + """ + + + ############################################################################################### + dataset_root = "/tmp/workspace/256ObjectCategoriesNew" # 数据集根目录 + samples_per_class = 3 # 每个类别抽取的样本数 + image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif') # 支持的图片格式 + + # 结果统计变量 + #total_samples = 0 + correct_predictions = 0 + + # 结果统计变量 + total_samples = 0 + true_positives = 0 + false_positives = 0 + false_negatives = 0 + total_processing_time = 0.0 # 总处理时间(秒) + + """ + # 遍历所有类别文件夹 + for folder_name in tqdm(os.listdir(dataset_root), desc="处理类别"): + folder_path = os.path.join(dataset_root, folder_name) + + + # 提取类别名(从"序号.name"格式中提取name部分) + class_name = folder_name.split('.', 1)[1].strip().lower() + + # 获取文件夹中所有图片 + image_files = [] + for file in os.listdir(folder_path): + if file.lower().endswith(image_extensions): + image_files.append(os.path.join(folder_path, file)) + + # 随机抽取指定数量的图片(如果不足则取全部) + selected_images = random.sample( + image_files, + min(samples_per_class, len(image_files)) + ) + + # 处理选中的图片 + for img_path in selected_images: + total_count += 1 + + # 发送预测请求 + prediction, error = test_image_prediction(sut_url, img_path) + if error: + print(f"处理图片 {img_path} 失败: {error}") + continue + + # 解析预测结果 + pred_class = prediction.get('class_name', '').lower() + confidence = prediction.get('confidence', 0) + + # 判断是否预测正确(真实类别是否在预测类别中) + if class_name in pred_class: + correct_predictions += 1 + + + # 可选:打印详细结果 + print(f"图片: {os.path.basename(img_path)} | 真实: {class_name} | 预测: {pred_class} | 置信度: {confidence:.4f} | {'正确' if is_correct else '错误'}") + """ + + # 遍历所有类别文件夹 + for folder_name in os.listdir(dataset_root): + folder_path = os.path.join(dataset_root, folder_name) + + # 跳过非文件夹的项目 + if not os.path.isdir(folder_path): + continue + + # 提取类别名(从"序号.name"格式中提取name部分) + try: + class_name = folder_name.split('.', 1)[1].strip().lower() + except IndexError: + print(f"警告:文件夹 {folder_name} 命名格式不正确,跳过该文件夹") + continue + + # 获取文件夹中所有图片 + image_files = [] + for file in os.listdir(folder_path): + file_path = os.path.join(folder_path, file) + if os.path.isfile(file_path) and file.lower().endswith(image_extensions): + image_files.append(file_path) + + # 随机抽取指定数量的图片(如果不足则取全部) + selected_images = random.sample( + image_files, + min(samples_per_class, len(image_files)) + ) + + # 处理该文件夹中的所有图片 + for img_path in selected_images: + total_samples += 1 + start_time = time.time() # 记录开始时间 + # 发送预测请求 + prediction, error = test_image_prediction(sut_url, img_path) + + # 计算单张图片处理时间(包括网络请求和模型预测) + processing_time = time.time() - start_time + total_processing_time += processing_time + + if error: + print(f"处理图片 {img_path} 失败: {error}") + # 处理失败的样本视为预测错误 + false_negatives += 1 + continue + + # 解析预测结果 + pred_class = prediction.get('class_name', '').lower() + confidence = prediction.get('confidence', 0) + + # 判断是否预测正确(真实类别是否在预测类别中,不分大小写) + is_correct = class_name in pred_class + + # 更新统计指标 + if is_correct: + true_positives += 1 + else: + false_positives += 1 + false_negatives += 1 + + # 打印详细结果(可选) + print(f"图片: {os.path.basename(img_path)} | 真实: {class_name} | 预测: {pred_class} | 置信度: {confidence:.4f} | {'正确' if is_correct else '错误'}") + + """ + # 计算整体指标(在单标签场景下,准确率=召回率) + if total_samples == 0: + overall_accuracy = 0.0 + overall_recall = 0.0 + else: + overall_accuracy = correct_predictions / total_samples + overall_recall = correct_predictions / total_samples # 整体召回率 + + # 输出统计结果 + print("\n" + "="*50) + print(f"测试总结:") + print(f"总测试样本数: {total_samples}") + print(f"正确预测样本数: {correct_predictions}") + print(f"整体准确率: {overall_accuracy:.4f} ({correct_predictions}/{total_samples})") + print(f"整体召回率: {overall_recall:.4f} ({correct_predictions}/{total_samples})") + print("="*50) + """ + # 初始化结果字典 + result = { + "total_processing_time": round(total_processing_time, 6), + "throughput": 0.0, + "accuracy": 0.0, + "recall": 0.0 + } + + # 计算评估指标 + if total_samples == 0: + print("没有找到任何图片样本") + + + # 准确率 = 正确预测的样本数 / 总预测样本数 + accuracy = true_positives / total_samples * 100 if total_samples > 0 else 0 + + # 召回率 = 正确预测的样本数 / (正确预测的样本数 + 未正确预测的正样本数) + recall_denominator = true_positives + false_negatives + recall = true_positives / recall_denominator * 100 if recall_denominator > 0 else 0 + + # 处理速度计算(每秒钟处理的图片张数) + # 避免除以0(当总时间极短时) + throughput = total_samples / total_processing_time if total_processing_time > 1e-6 else 0 + + # 更新结果字典 + result.update({ + "throughput": round(throughput, 6), + "accuracy": round(accuracy, 6), + "recall": round(recall, 6) + }) + + # 打印最终统计结果 + print("\n" + "="*50) + print(f"总样本数: {total_samples}") + print(f"总处理时间: {total_processing_time:.4f}秒") + print(f"处理速度: {throughput:.2f}张/秒") # 新增:每秒钟处理的图片张数 + print(f"正确预测: {true_positives}") + print(f"错误预测: {total_samples - true_positives}") + print(f"准确率: {accuracy:.4f} ({true_positives}/{total_samples})") + print(f"召回率: {recall:.4f} ({true_positives}/{recall_denominator})") + print("="*50) + + + #result = {} + #result['accuracy_1_1'] = 3 + result2file(result) + + """ + if result['accuracy_1_1'] < 0.9: + log.error(f"1:1正确率未达到90%, 视为产品不可用") + change_product_unavailable() + + + if result['accuracy_1_N'] < 1: + log.error(f"1:N正确率未达到100%, 视为产品不可用") + change_product_unavailable() + if result['1_1_latency'] > 0.5: + log.error(f"1:1平均latency超过0.5s, 视为产品不可用") + change_product_unavailable() + if result['1_N_latency'] > 0.5: + log.error(f"1:N平均latency超过0.5s, 视为产品不可用") + change_product_unavailable() + if result['enroll_latency'] > 1: + log.error(f"enroll(入库)平均latency超过1s, 视为产品不可用") + change_product_unavailable() + """ + exit_code = 0 + + diff --git a/run_callback_new.py b/run_callback_new.py new file mode 100644 index 0000000..77225b7 --- /dev/null +++ b/run_callback_new.py @@ -0,0 +1,1296 @@ +import json +import os +import sys +import time +import tempfile +import zipfile +import threading +from collections import defaultdict +from typing import Dict, List + +import yaml +from pydantic import ValidationError + +from schemas.dataset import QueryData +from utils.client_callback import ClientCallback, EvaluateResult, StopException +from utils.logger import log +from utils.service import register_sut +from utils.update_submit import change_product_available +from utils.file import dump_json, load_yaml, unzip_dir, load_json, write_file, dump_yaml +from utils.leaderboard import change_product_unavailable + + +lck = threading.Lock() + +# Environment variables by leaderboard +DATASET_FILEPATH = os.environ["DATASET_FILEPATH"] +RESULT_FILEPATH = os.environ["RESULT_FILEPATH"] + +DETAILED_CASES_FILEPATH = os.environ["DETAILED_CASES_FILEPATH"] +SUBMIT_CONFIG_FILEPATH = os.environ["SUBMIT_CONFIG_FILEPATH"] +BENCHMARK_NAME = os.environ["BENCHMARK_NAME"] +TEST_CONCURRENCY = int(os.getenv('TEST_CONCURRENCY', 1)) +THRESHOLD_OMCER = float(os.getenv('THRESHOLD_OMCER', 0.8)) + +log.info(f"DATASET_FILEPATH: {DATASET_FILEPATH}") +workspace_path = "/tmp/workspace" + + +# Environment variables by kubernetes +MY_POD_IP = os.environ["MY_POD_IP"] + +# constants +RESOURCE_NAME = BENCHMARK_NAME + +# Environment variables by judge_flow_config +LANG = os.getenv("lang") +SUT_CPU = os.getenv("SUT_CPU", "2") +SUT_MEMORY = os.getenv("SUT_MEMORY", "4Gi") +SUT_VGPU = os.getenv("SUT_VGPU", "1") +#SUT_VGPU_MEM = os.getenv("SUT_VGPU_MEM", str(1843 * int(SUT_VGPU))) +#SUT_VGPU_CORES = os.getenv("SUT_VGPU_CORES", str(8 * int(SUT_VGPU))) +SUT_VGPU_ACCELERATOR = os.getenv("SUT_VGPU_ACCELERATOR", "iluvatar-BI-V100") +RESOURCE_TYPE = os.getenv("RESOURCE_TYPE", "vgpu") +assert RESOURCE_TYPE in [ + "cpu", + "vgpu", +], "benchmark judge_flow_config error: RESOURCE_TYPE should be cpu or vgpu" + + +unzip_dir(DATASET_FILEPATH, workspace_path) + +def get_sut_url_kubernetes(): + with open(SUBMIT_CONFIG_FILEPATH, "r") as f: + submit_config = yaml.safe_load(f) + assert isinstance(submit_config, dict) + + submit_config.setdefault("values", {}) + + submit_config["values"]["containers"] = [ + { + "name": "corex-container", + "image": "harbor.4pd.io/lab-platform/inf/python:3.9", #镜像 + "command": ["sleep"], # 替换为你的模型启动命令,使用python解释器 + "args": ["3600"], # 替换为你的模型参数,运行我的推理脚本 + + # 添加存储卷挂载 + #"volumeMounts": [ + # { + # "name": "model-volume", + # "mountPath": "/model" # 挂载到/model目录 + # } + #] + } + ] + + """ + # 添加存储卷配置 + submit_config["values"]["volumes"] = [ + { + "name": "model-volume", + "persistentVolumeClaim": { + "claimName": "sid-model-pvc" # 使用已有的PVC + } + } + ] + """ + + """ + # Inject specified cpu and memory + resource = { + "cpu": SUT_CPU, + "memory": SUT_MEMORY, + } + """ + submit_config["values"]["resources"] = { + "requests":{}, + "limits": {}, + } + + limits = submit_config["values"]["resources"]["limits"] + requests = submit_config["values"]["resources"]["requests"] + + + """ + # ########## 关键修改:替换为iluvatar GPU配置 ########## + if RESOURCE_TYPE == "vgpu": # 假设你的模型需要GPU + # 替换nvidia资源键为iluvatar.ai/gpu + vgpu_resource = { + "iluvatar.ai/gpu": SUT_VGPU, # 对应你的GPU资源键 + # 若需要其他资源(如显存),按你的K8s配置补充,例如: + # "iluvatar.ai/gpumem": SUT_VGPU_MEM, + } + limits.update(vgpu_resource) + requests.update(vgpu_resource) + # 节点选择器:替换为你的accelerator标签 + submit_config["values"]["nodeSelector"] = { + "contest.4pd.io/accelerator": "iluvatar-BI-V100" # 你的节点标签 + } + # 容忍度:替换为你的tolerations配置 + submit_config["values"]["tolerations"] = [ + { + "key": "hosttype", + "operator": "Equal", + "value": "iluvatar", + "effect": "NoSchedule", + } + ] + # ######################################### + # 禁止CPU模式下使用GPU资源(保持原逻辑) + else: + if "iluvatar.ai/gpu" in limits or "iluvatar.ai/gpu" in requests: + log.error("禁止在CPU模式下使用GPU资源") + sys.exit(1) + + + + #gpukeys = ["iluvatar.ai/gpu"] # 检查iluvatar GPU键 + #for key in gpukeys: + # if key in limits or key in requests: + # log.error("禁止使用vgpu资源") + # sys.exit(1) + + """ + + # 替换nvidia资源键为iluvatar.ai/gpu + vgpu_resource = { + "iluvatar.ai/gpu": SUT_VGPU, # 对应你的GPU资源键 + # 若需要其他资源(如显存),按你的K8s配置补充,例如: + # "iluvatar.ai/gpumem": SUT_VGPU_MEM, + } + limits.update(vgpu_resource) + requests.update(vgpu_resource) + # 节点选择器:替换为你的accelerator标签 + submit_config["values"]["nodeSelector"] = { + "contest.4pd.io/accelerator": "iluvatar-BI-V100" # 你的节点标签 + } + # 容忍度:替换为你的tolerations配置 + """ + submit_config["values"]["tolerations"] = [ + { + "key": "hosttype", + "operator": "Equal", + "value": "iluvatar", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "arm64", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "myinit", + "effect": "NoSchedule", + }, + { + "key": "hosttype", + "operator": "Equal", + "value": "middleware", + "effect": "NoSchedule", + } + + ] + """ + """ + { + "key": "node-role.kubernetes.io/master", + "operator": "Exists", + "effect": "NoSchedule", + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + """ + + + log.info(f"submit_config: {submit_config}") + log.info(f"RESOURCE_NAME: {RESOURCE_NAME}") + + return register_sut(submit_config, RESOURCE_NAME).replace( + "ws://", "http://" + ) + + +def get_sut_url(): + return get_sut_url_kubernetes() + +#SUT_URL = get_sut_url() +#os.environ["SUT_URL"] = SUT_URL + +""" +def load_dataset( + dataset_filepath: str, +) -> Dict[str, List[QueryData]]: + dataset_path = tempfile.mkdtemp() + + with zipfile.ZipFile(dataset_filepath) as zf: + zf.extractall(dataset_path) + + basename = os.path.basename(dataset_filepath) + datayaml = os.path.join(dataset_path, "data.yaml") + if not os.path.exists(datayaml): + sub_dataset_paths = os.listdir(dataset_path) + dataset = {} + for sub_dataset_path in sub_dataset_paths: + sub_dataset = load_dataset( + os.path.join(dataset_path, sub_dataset_path) + ) + for k, v in sub_dataset.items(): + k = os.path.join(basename, k) + dataset[k] = v + return dataset + + with open(datayaml, "r") as f: + data = yaml.safe_load(f) + assert isinstance(data, dict) + + lang = LANG + data_lang = data.get("global", {}).get("lang") + if lang is None and data_lang is not None: + if data_lang is not None: + # 使用配置中的语言类型 + lang = data_lang + if lang is None and basename.startswith("asr.") and len(basename) == 4 + 2: + # 数据集名称为asr.en 可以认为语言为en + lang = basename[4:] + if lang is None: + log.error( + "数据集错误 通过data.yaml中的 global.lang 或 数据集名称 asr.xx 指定语言类型" + ) + sys.exit(1) + + query_data = data.get("query_data", []) + audio_size_map = {} + for query in query_data: + query["lang"] = lang + query["file"] = os.path.join(dataset_path, query["file"]) + audio_size_map[query["file"]] = os.path.getsize(query["file"]) + # 根据音频大小排序 + query_data = sorted( + query_data, key=lambda x: audio_size_map[x["file"]], reverse=True + ) + valid_query_data = [] + for i_query_data in query_data: + try: + valid_query_data.append(QueryData.model_validate(i_query_data)) + except ValidationError: + log.error("数据集错误 数据中query_data格式错误") + sys.exit(1) + + return { + basename: valid_query_data, + } + + +def merge_query_data(dataset: Dict[str, List[QueryData]]) -> List[QueryData]: + query_datas = [] + for query_data in dataset.values(): + query_datas.extend(query_data) + return query_datas + + +def run_one_predict( + client: ClientCallback, query_data: QueryData, task_id: str +) -> EvaluateResult: + try: + client.predict(None, query_data.file, query_data.duration, task_id) + except StopException: + sys.exit(1) + + client.finished.wait() + + if client.error is not None: + sys.exit(1) + + client.app_on = False + + try: + with lck: + ret = client.evaluate(query_data) + return ret + except StopException: + sys.exit(1) +""" + +""" +def predict_task( + client: ClientCallback, task_id: int, query_data: QueryData, test_results: list +): + log.info(f"Task-{task_id}开始评测") + test_results[task_id] = run_one_predict(client, query_data, str(task_id)) + + +def merge_concurrent_result(evaluate_results: List[EvaluateResult]) -> Dict: + cer = 0.0 + align_start = {} + align_end = {} + first_word_distance_sum = 0.0 + last_word_distance_sum = 0.0 + rtf = 0.0 + first_receive_delay: float = 0.0 + query_count: int = 0 + voice_count: int = 0 + pred_punctuation_num: int = 0 + label_punctuation_num: int = 0 + pred_sentence_punctuation_num: int = 0 + label_setence_punctuation_num: int = 0 + + for evalute_result in evaluate_results: + cer += evalute_result.cer + for k, v in evalute_result.align_start.items(): + align_start.setdefault(k, 0) + align_start[k] += v + for k, v in evalute_result.align_end.items(): + align_end.setdefault(k, 0) + align_end[k] += v + first_word_distance_sum += evalute_result.first_word_distance_sum + last_word_distance_sum += evalute_result.last_word_distance_sum + rtf += evalute_result.rtf + first_receive_delay += evalute_result.first_receive_delay + query_count += evalute_result.query_count + voice_count += evalute_result.voice_count + pred_punctuation_num += evalute_result.pred_punctuation_num + label_punctuation_num += evalute_result.label_punctuation_num + pred_sentence_punctuation_num += ( + evalute_result.pred_sentence_punctuation_num + ) + label_setence_punctuation_num += ( + evalute_result.label_setence_punctuation_num + ) + lens = len(evaluate_results) + cer /= lens + for k, v in align_start.items(): + align_start[k] /= voice_count + for k, v in align_end.items(): + align_end[k] /= voice_count + first_word_distance = first_word_distance_sum / voice_count + last_word_distance = last_word_distance_sum / voice_count + rtf /= lens + first_receive_delay /= lens + json_result = { + "one_minus_cer": 1 - cer, + "first_word_distance_mean": first_word_distance, + "last_word_distance_mean": last_word_distance, + "query_count": query_count // lens, + "voice_count": voice_count // lens, + "rtf": rtf, + "first_receive_delay": first_receive_delay, + "punctuation_ratio": ( + pred_punctuation_num / label_punctuation_num + if label_punctuation_num > 0 + else 1.0 + ), + "sentence_punctuation_ratio": ( + pred_sentence_punctuation_num / label_setence_punctuation_num + if label_setence_punctuation_num > 0 + else 1.0 + ), + } + for k, v in align_start.items(): + json_result["start_word_%dms_ratio" % k] = v + for k, v in align_end.items(): + json_result["end_word_%dms_ratio" % k] = v + + return json_result + + +def merge_result(result: Dict[str, List[EvaluateResult]]) -> Dict: + json_result = {} + for lang, evaluate_results in result.items(): + if len(evaluate_results) == 0: + continue + cer = 0.0 + align_start = {} + align_end = {} + first_word_distance_sum = 0.0 + last_word_distance_sum = 0.0 + rtf = 0.0 + first_receive_delay: float = 0.0 + query_count: int = 0 + voice_count: int = 0 + pred_punctuation_num: int = 0 + label_punctuation_num: int = 0 + pred_sentence_punctuation_num: int = 0 + label_setence_punctuation_num: int = 0 + for evalute_result in evaluate_results: + cer += evalute_result.cer + for k, v in evalute_result.align_start.items(): + align_start.setdefault(k, 0) + align_start[k] += v + for k, v in evalute_result.align_end.items(): + align_end.setdefault(k, 0) + align_end[k] += v + first_word_distance_sum += evalute_result.first_word_distance_sum + last_word_distance_sum += evalute_result.last_word_distance_sum + rtf += evalute_result.rtf + first_receive_delay += evalute_result.first_receive_delay + query_count += evalute_result.query_count + voice_count += evalute_result.voice_count + pred_punctuation_num += evalute_result.pred_punctuation_num + label_punctuation_num += evalute_result.label_punctuation_num + pred_sentence_punctuation_num += ( + evalute_result.pred_sentence_punctuation_num + ) + label_setence_punctuation_num += ( + evalute_result.label_setence_punctuation_num + ) + lens = len(evaluate_results) + cer /= lens + for k, v in align_start.items(): + align_start[k] /= voice_count + for k, v in align_end.items(): + align_end[k] /= voice_count + first_word_distance = first_word_distance_sum / voice_count + last_word_distance = last_word_distance_sum / voice_count + rtf /= lens + first_receive_delay /= lens + lang_result = { + "one_minus_cer": 1 - cer, + "first_word_distance_mean": first_word_distance, + "last_word_distance_mean": last_word_distance, + "query_count": 1, + "voice_count": voice_count, + "rtf": rtf, + "first_receive_delay": first_receive_delay, + "punctuation_ratio": ( + pred_punctuation_num / label_punctuation_num + if label_punctuation_num > 0 + else 1.0 + ), + "sentence_punctuation_ratio": ( + pred_sentence_punctuation_num / label_setence_punctuation_num + if label_setence_punctuation_num > 0 + else 1.0 + ), + } + for k, v in align_start.items(): + lang_result["start_word_%dms_ratio" % k] = v + for k, v in align_end.items(): + lang_result["end_word_%dms_ratio" % k] = v + if lang == "": + json_result.update(lang_result) + else: + json_result[lang] = lang_result + return json_result +""" + +""" +def main(): + log.info(f'{TEST_CONCURRENCY=}, {THRESHOLD_OMCER=}') + dataset = load_dataset(DATASET_FILEPATH) + query_datas = merge_query_data(dataset) + + #获取 ASR 服务 URL(通常从 Kubernetes 配置) + sut_url = get_sut_url() + + #创建多个客户端实例(每个客户端监听不同端口,如 80、81、82...) + port_base = 80 + clients = [ClientCallback(sut_url, port_base + i) for i in range(TEST_CONCURRENCY)] + + #准备测试数据与线程 + detail_cases = [] + # we use the same test data for all requests + query_data = query_datas[0] + + test_results = [None] * len(clients) + test_threads = [threading.Thread(target=predict_task, args=(client, task_id, query_data, test_results)) + for task_id, client in enumerate(clients)] + + #启动并发测试,启动线程并间隔10秒,设置超时时间为1小时 + for t in test_threads: + t.start() + time.sleep(10) + [t.join(timeout=3600) for t in test_threads] + + #合并结果与评估 + final_result = merge_concurrent_result(test_results) + product_avaiable = all([c.product_avaiable for c in clients]) + + final_result['concurrent_req'] = TEST_CONCURRENCY + if final_result['one_minus_cer'] < THRESHOLD_OMCER: + product_avaiable = False + + if not product_avaiable: + final_result['success'] = False + change_product_available() + else: + final_result['success'] = True + + #保存结果, + log.info( + "指标结果为: %s", json.dumps(final_result, indent=2, ensure_ascii=False) + ) + + time.sleep(120) + #打印并保存最终结果到文件 + with open(RESULT_FILEPATH, "w") as f: + json.dump(final_result, f, indent=2, ensure_ascii=False) + #保存详细测试用例结果 + with open(DETAILED_CASES_FILEPATH, "w") as f: + json.dump(detail_cases, f, indent=2, ensure_ascii=False) +""" + +############################################################################# + +import requests +import base64 + +def gen_req_body(apiname, APPId, file_path=None, featureId=None, featureInfo=None, dstFeatureId=None): + """ + 生成请求的body + :param apiname + :param APPId: Appid + :param file_name: 文件路径 + :return: + """ + if apiname == 'createFeature': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "createFeature", + "groupId": "test_voiceprint_e", + "featureId": featureId, + "featureInfo": featureInfo, + "createFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'createGroup': + + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "createGroup", + "groupId": "test_voiceprint_e", + "groupName": "vip_user", + "groupInfo": "store_vip_user_voiceprint", + "createGroupRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'deleteFeature': + + body = { + "header": { + "app_id": APPId, + "status": 3 + + }, + "parameter": { + "s782b4996": { + "func": "deleteFeature", + "groupId": "iFLYTEK_examples_groupId", + "featureId": "iFLYTEK_examples_featureId", + "deleteFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'queryFeatureList': + + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "queryFeatureList", + "groupId": "user_voiceprint_2", + "queryFeatureListRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + elif apiname == 'searchFea': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "searchFea", + "groupId": "test_voiceprint_e", + "topK": 1, + "searchFeaRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'searchScoreFea': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "searchScoreFea", + "groupId": "test_voiceprint_e", + "dstFeatureId": dstFeatureId, + "searchScoreFeaRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'updateFeature': + + with open(file_path, "rb") as f: + audioBytes = f.read() + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "updateFeature", + "groupId": "iFLYTEK_examples_groupId", + "featureId": "iFLYTEK_examples_featureId", + "featureInfo": "iFLYTEK_examples_featureInfo_update", + "updateFeatureRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + }, + "payload": { + "resource": { + "encoding": "lame", + "sample_rate": 16000, + "channels": 1, + "bit_depth": 16, + "status": 3, + "audio": str(base64.b64encode(audioBytes), 'UTF-8') + } + } + } + elif apiname == 'deleteGroup': + body = { + "header": { + "app_id": APPId, + "status": 3 + }, + "parameter": { + "s782b4996": { + "func": "deleteGroup", + "groupId": "iFLYTEK_examples_groupId", + "deleteGroupRes": { + "encoding": "utf8", + "compress": "raw", + "format": "json" + } + } + } + } + else: + raise Exception( + "输入的apiname不在[createFeature, createGroup, deleteFeature, queryFeatureList, searchFea, searchScoreFea,updateFeature]内,请检查") + return body + + + +log.info(f"开始请求获取到SUT服务URL") +# 获取SUT服务URL +sut_url = get_sut_url() +print(f"获取到的SUT_URL: {sut_url}") # 调试输出 +log.info(f"获取到SUT服务URL: {sut_url}") + +from urllib.parse import urlparse + +# 全局变量 +text_decoded = None + +###################################新增新增################################ +def req_url(api_name, APPId, file_path=None, featureId=None, featureInfo=None, dstFeatureId=None): + """ + 开始请求 + :param APPId: APPID + :param file_path: body里的文件路径 + :return: + """ + + global text_decoded + + body = gen_req_body(apiname=api_name, APPId=APPId, file_path=file_path, featureId=featureId, featureInfo=featureInfo, dstFeatureId=dstFeatureId) + #request_url = 'https://ai-cloud.4paradigm.com:9443/sid/v1/private/s782b4996' + + #request_url = 'https://sut:80/sid/v1/private/s782b4996' + + #headers = {'content-type': "application/json", 'host': 'ai-cloud.4paradigm.com', 'appid': APPId} + + parsed_url = urlparse(sut_url) + headers = {'content-type': "application/json", 'host': parsed_url.hostname, 'appid': APPId} + + # 1. 首先测试服务健康检查 + response = requests.get(f"{sut_url}/health") + print(response.status_code, response.text) + + + # 请求头 + headers = {"Content-Type": "application/json"} + # 请求体(可指定限制处理的图片数量) + body = {"limit": 20 } # 可选参数,限制处理的图片总数 + + # 发送POST请求 + response = requests.post( + f"{sut_url}/v1/private/s782b4996", + data=json.dumps(body), + headers=headers + ) + + # 解析响应结果 + if response.status_code == 200: + result = response.json() + print("预测评估结果:") + print(f"准确率: {result['metrics']['accuracy']}%") + print(f"平均召回率: {result['metrics']['average_recall']}%") + print(f"处理图片总数: {result['metrics']['total_images']}") + else: + print(f"请求失败,状态码: {response.status_code}") + print(f"错误信息: {response.text}") + + + + + # 添加基本认证信息 + auth = ('llm', 'Rmf4#LcG(iFZrjU;2J') + #response = requests.post(request_url, data=json.dumps(body), headers=headers, auth=auth) + + #response = requests.post(sut_url + "/predict", data=json.dumps(body), headers=headers, auth=auth) + #response = requests.post(f"{sut_url}/sid/v1/private/s782b4996", data=json.dumps(body), headers=headers, auth=auth) + """ + response = requests.post(f"{sut_url}/v1/private/s782b4996", data=json.dumps(body), headers=headers) + """ + + + + + #print("HTTP状态码:", response.status_code) + #print("原始响应内容:", response.text) # 先打印原始内容 + #print(f"请求URL: {sut_url + '/v1/private/s782b4996'}") + #print(f"请求headers: {headers}") + #print(f"请求body: {body}") + + + + #tempResult = json.loads(response.content.decode('utf-8')) + #print(tempResult) + + """ + # 对text字段进行Base64解码 + if 'payload' in tempResult and 'updateFeatureRes' in tempResult['payload']: + text_encoded = tempResult['payload']['updateFeatureRes']['text'] + text_decoded = base64.b64decode(text_encoded).decode('utf-8') + print(f"Base64解码后的text字段内容: {text_decoded}") + """ + + #text_encoded = tempResult['payload']['updateFeatureRes']['text'] + #text_decoded = base64.b64decode(text_encoded).decode('utf-8') + #print(f"Base64解码后的text字段内容: {text_decoded}") + + + # 获取响应的 JSON 数据 + result = response.json() + with open(RESULT_FILEPATH, "w") as f: + json.dump(result, f, indent=4, ensure_ascii=False) + print(f"结果已成功写入 {RESULT_FILEPATH}") + +submit_config_filepath = os.getenv("SUBMIT_CONFIG_FILEPATH", "./tests/resources/submit_config") +result_filepath = os.getenv("RESULT_FILEPATH", "./out/result") +bad_cases_filepath = os.getenv("BAD_CASES_FILEPATH", "./out/badcase") +#detail_cases_filepath = os.getenv("DETAILED_CASES_FILEPATH", "./out/detailcase.jsonl") + +from typing import Any, Dict, List + +def result2file( + result: Dict[str, Any], + detail_cases: List[Dict[str, Any]] = None +): + assert result_filepath is not None + assert bad_cases_filepath is not None + #assert detailed_cases_filepath is not None + + if result is not None: + with open(result_filepath, "w") as f: + json.dump(result, f, indent=4, ensure_ascii=False) + #if LOCAL_TEST: + # logger.info(f'result:\n {json.dumps(result, indent=4)}') + """ + if detail_cases is not None: + with open(detailed_cases_filepath, "w") as f: + json.dump(detail_cases, f, indent=4, ensure_ascii=False) + if LOCAL_TEST: + logger.info(f'result:\n {json.dumps(detail_cases, indent=4)}') + """ + + +def test_image_prediction(sut_url, image_path): + """发送单张图片到服务端预测""" + url = f"{sut_url}/v1/private/s782b4996" + + try: + with open(image_path, 'rb') as f: + files = {'image': f} + response = requests.post(url, files=files, timeout=30) + + result = response.json() + if result.get('status') != 'success': + return None, f"服务端错误: {result.get('message')}" + + return result.get('top_prediction'), None + except Exception as e: + return None, f"请求错误: {str(e)}" + + + +import random +import time +#from tqdm import tqdm +import os +import requests + +if __name__ == '__main__': + + print(f"\n===== main开始请求接口 ===============================================") + # 1. 首先测试服务健康检查 + + print(f"\n===== 服务健康检查 ===================================================") + response = requests.get(f"{sut_url}/health") + print(response.status_code, response.text) + + """ + # 本地图片路径和真实标签(根据实际情况修改) + image_path = "/path/to/your/test_image.jpg" + true_label = "cat" # 图片的真实标签 + """ + + + """ + # 请求头 + headers = {"Content-Type": "application/json"} + # 请求体(可指定限制处理的图片数量) + body = {"limit": 20 } # 可选参数,限制处理的图片总数 + + # 发送POST请求 + response = requests.post( + f"{sut_url}/v1/private/s782b4996", + data=json.dumps(body), + headers=headers + ) + """ + + """ + # 读取图片文件 + with open(image_path, 'rb') as f: + files = {'image': f} + # 发送POST请求 + response = requests.post(f"{sut_url}/v1/private/s782b4996", files=files) + + + # 解析响应结果 + if response.status_code == 200: + result = response.json() + print("预测评估结果:") + print(f"准确率: {result['metrics']['accuracy']}%") + print(f"平均召回率: {result['metrics']['average_recall']}%") + print(f"处理图片总数: {result['metrics']['total_images']}") + else: + print(f"请求失败,状态码: {response.status_code}") + print(f"错误信息: {response.text}") + """ + + + ############################################################################################### + dataset_root = "/tmp/workspace/256ObjectCategoriesNew" # 数据集根目录 + samples_per_class = 3 # 每个类别抽取的样本数 + image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif') # 支持的图片格式 + + # 结果统计变量 + #total_samples = 0 + correct_predictions = 0 + + # 结果统计变量 + total_samples = 0 + + # CUDA统计 + cuda_true_positives = 0 + cuda_false_positives = 0 + cuda_false_negatives = 0 + cuda_total_processing_time = 0.0 + + # CPU统计 + cpu_true_positives = 0 + cpu_false_positives = 0 + cpu_false_negatives = 0 + cpu_total_processing_time = 0.0 + + + + true_positives = 0 + false_positives = 0 + false_negatives = 0 + total_processing_time = 0.0 # 总处理时间(秒) + + """ + # 遍历所有类别文件夹 + for folder_name in tqdm(os.listdir(dataset_root), desc="处理类别"): + folder_path = os.path.join(dataset_root, folder_name) + + + # 提取类别名(从"序号.name"格式中提取name部分) + class_name = folder_name.split('.', 1)[1].strip().lower() + + # 获取文件夹中所有图片 + image_files = [] + for file in os.listdir(folder_path): + if file.lower().endswith(image_extensions): + image_files.append(os.path.join(folder_path, file)) + + # 随机抽取指定数量的图片(如果不足则取全部) + selected_images = random.sample( + image_files, + min(samples_per_class, len(image_files)) + ) + + # 处理选中的图片 + for img_path in selected_images: + total_count += 1 + + # 发送预测请求 + prediction, error = test_image_prediction(sut_url, img_path) + if error: + print(f"处理图片 {img_path} 失败: {error}") + continue + + # 解析预测结果 + pred_class = prediction.get('class_name', '').lower() + confidence = prediction.get('confidence', 0) + + # 判断是否预测正确(真实类别是否在预测类别中) + if class_name in pred_class: + correct_predictions += 1 + + + # 可选:打印详细结果 + print(f"图片: {os.path.basename(img_path)} | 真实: {class_name} | 预测: {pred_class} | 置信度: {confidence:.4f} | {'正确' if is_correct else '错误'}") + """ + + # 遍历所有类别文件夹 + for folder_name in os.listdir(dataset_root): + folder_path = os.path.join(dataset_root, folder_name) + + # 跳过非文件夹的项目 + if not os.path.isdir(folder_path): + continue + + # 提取类别名(从"序号.name"格式中提取name部分) + try: + class_name = folder_name.split('.', 1)[1].strip().lower() + except IndexError: + print(f"警告:文件夹 {folder_name} 命名格式不正确,跳过该文件夹") + continue + + # 获取文件夹中所有图片 + image_files = [] + for file in os.listdir(folder_path): + file_path = os.path.join(folder_path, file) + if os.path.isfile(file_path) and file.lower().endswith(image_extensions): + image_files.append(file_path) + + # 随机抽取指定数量的图片(如果不足则取全部) + selected_images = random.sample( + image_files, + min(samples_per_class, len(image_files)) + ) + + # 处理该文件夹中的所有图片 + for img_path in selected_images: + total_samples += 1 + start_time = time.time() # 记录开始时间 + # 发送预测请求 + #prediction, error = test_image_prediction(sut_url, img_path) + + # 获取cuda和cpu的预测结果及处理时间 + cuda_pred, cpu_pred, error, processing_time = test_image_prediction(sut_url, img_path) + + # 计算单张图片处理时间(包括网络请求和模型预测) + #processing_time = time.time() - start_time + #total_processing_time += processing_time + + # 累加处理时间(单次请求的时间同时用于cuda和cpu统计) + cuda_total_processing_time += processing_time + cpu_total_processing_time += processing_time + + if error: + print(f"处理图片 {img_path} 失败: {error}") + # 处理失败的样本视为预测错误 + #false_negatives += 1 + # 处理失败时两种设备都记为错误 + cuda_false_negatives += 1 + cpu_false_negatives += 1 + continue + + # 解析预测结果 + #pred_class = prediction.get('class_name', '').lower() + #confidence = prediction.get('confidence', 0) + + # 判断是否预测正确(真实类别是否在预测类别中,不分大小写) + #is_correct = class_name in pred_class + + # 更新统计指标 + #if is_correct: + # true_positives += 1 + #else: + # false_positives += 1 + # false_negatives += 1 + + # 处理CUDA预测结果 + if cuda_pred: + cuda_pred_class = cuda_pred.get('class_name', '').lower() + cuda_is_correct = class_name in cuda_pred_class + + if cuda_is_correct: + cuda_true_positives += 1 + else: + cuda_false_positives += 1 + cuda_false_negatives += 1 + + # 处理CPU预测结果 + if cpu_pred: + cpu_pred_class = cpu_pred.get('class_name', '').lower() + cpu_is_correct = class_name in cpu_pred_class + + if cpu_is_correct: + cpu_true_positives += 1 + else: + cpu_false_positives += 1 + cpu_false_negatives += 1 + + # 打印详细结果(可选) + #print(f"图片: {os.path.basename(img_path)} | 真实: {class_name} | 预测: {pred_class} | 置信度: {confidence:.4f} | {'正确' if is_correct else '错误'}") + print(f"图片: {os.path.basename(img_path)} | 真实: {class_name}") + print(f"CUDA预测: {cuda_pred_class} | {'正确' if cuda_is_correct else '错误'}") + print(f"CPU预测: {cpu_pred_class} | {'正确' if cpu_is_correct else '错误'}\n") + + """ + # 计算整体指标(在单标签场景下,准确率=召回率) + if total_samples == 0: + overall_accuracy = 0.0 + overall_recall = 0.0 + else: + overall_accuracy = correct_predictions / total_samples + overall_recall = correct_predictions / total_samples # 整体召回率 + + # 输出统计结果 + print("\n" + "="*50) + print(f"测试总结:") + print(f"总测试样本数: {total_samples}") + print(f"正确预测样本数: {correct_predictions}") + print(f"整体准确率: {overall_accuracy:.4f} ({correct_predictions}/{total_samples})") + print(f"整体召回率: {overall_recall:.4f} ({correct_predictions}/{total_samples})") + print("="*50) + """ + # 初始化结果字典 + """ + result = { + "total_processing_time": round(total_processing_time, 6), + "throughput": 0.0, + "accuracy": 0.0, + "recall": 0.0 + } + + # 计算评估指标 + if total_samples == 0: + print("没有找到任何图片样本") + + + # 准确率 = 正确预测的样本数 / 总预测样本数 + accuracy = true_positives / total_samples * 100 if total_samples > 0 else 0 + + # 召回率 = 正确预测的样本数 / (正确预测的样本数 + 未正确预测的正样本数) + recall_denominator = true_positives + false_negatives + recall = true_positives / recall_denominator * 100 if recall_denominator > 0 else 0 + + # 处理速度计算(每秒钟处理的图片张数) + # 避免除以0(当总时间极短时) + throughput = total_samples / total_processing_time if total_processing_time > 1e-6 else 0 + + # 更新结果字典 + result.update({ + "throughput": round(throughput, 6), + "accuracy": round(accuracy, 6), + "recall": round(recall, 6) + }) + """ + + + # 初始化结果字典 + result = { + # CUDA指标 + "cuda_total_processing_time": round(cuda_total_processing_time, 6), + "cuda_throughput": 0.0, + "cuda_accuracy": 0.0, + "cuda_recall": 0.0, + + # CPU指标 + "cpu_total_processing_time": round(cpu_total_processing_time, 6), + "cpu_throughput": 0.0, + "cpu_accuracy": 0.0, + "cpu_recall": 0.0, + + + } + + # 计算CUDA指标 + cuda_accuracy = cuda_true_positives / total_samples * 100 if total_samples > 0 else 0 + cuda_recall_denominator = cuda_true_positives + cuda_false_negatives + cuda_recall = cuda_true_positives / cuda_recall_denominator * 100 if cuda_recall_denominator > 0 else 0 + cuda_throughput = total_samples / cuda_total_processing_time if cuda_total_processing_time > 1e-6 else 0 + + # 计算CPU指标 + cpu_accuracy = cpu_true_positives / total_samples * 100 if total_samples > 0 else 0 + cpu_recall_denominator = cpu_true_positives + cpu_false_negatives + cpu_recall = cpu_true_positives / cpu_recall_denominator * 100 if cpu_recall_denominator > 0 else 0 + cpu_throughput = total_samples / cpu_total_processing_time if cpu_total_processing_time > 1e-6 else 0 + + # 更新结果字典 + result.update({ + # CUDA指标 + "cuda_throughput": round(cuda_throughput, 6), + "cuda_accuracy": round(cuda_accuracy, 6), + "cuda_recall": round(cuda_recall, 6), + + # CPU指标 + "cpu_throughput": round(cpu_throughput, 6), + "cpu_accuracy": round(cpu_accuracy, 6), + "cpu_recall": round(cpu_recall, 6) + }) + + # 打印最终统计结果 + print("\n" + "="*50) + print(f"总样本数: {total_samples}") + + print("\nCUDA 统计:") + print(f"总处理时间: {cuda_total_processing_time:.4f}秒") + print(f"处理速度: {result['cuda_throughput']:.2f}张/秒") + print(f"正确预测: {cuda_true_positives}") + print(f"准确率: {result['cuda_accuracy']:.4f}%") + print(f"召回率: {result['cuda_recall']:.4f}%") + + print("\nCPU 统计:") + print(f"总处理时间: {cpu_total_processing_time:.4f}秒") + print(f"处理速度: {result['cpu_throughput']:.2f}张/秒") + print(f"正确预测: {cpu_true_positives}") + print(f"准确率: {result['cpu_accuracy']:.4f}%") + print(f"召回率: {result['cpu_recall']:.4f}%") + print("="*50) + + + #result = {} + #result['accuracy_1_1'] = 3 + result2file(result) + + """ + if result['accuracy_1_1'] < 0.9: + log.error(f"1:1正确率未达到90%, 视为产品不可用") + change_product_unavailable() + + + if result['accuracy_1_N'] < 1: + log.error(f"1:N正确率未达到100%, 视为产品不可用") + change_product_unavailable() + if result['1_1_latency'] > 0.5: + log.error(f"1:1平均latency超过0.5s, 视为产品不可用") + change_product_unavailable() + if result['1_N_latency'] > 0.5: + log.error(f"1:N平均latency超过0.5s, 视为产品不可用") + change_product_unavailable() + if result['enroll_latency'] > 1: + log.error(f"enroll(入库)平均latency超过1s, 视为产品不可用") + change_product_unavailable() + """ + exit_code = 0 + + diff --git a/schemas/__init__.py b/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/schemas/context.py b/schemas/context.py new file mode 100644 index 0000000..adff8d6 --- /dev/null +++ b/schemas/context.py @@ -0,0 +1,90 @@ +import os +from copy import deepcopy +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +from schemas.stream import StreamDataModel + + +class LabelContext(BaseModel): + start: float + end: float + answer: str + + +class PredContext(BaseModel): + recognition_results: StreamDataModel + recv_time: Optional[float] = Field(None) + send_time: Optional[float] = Field(None) + + +class ASRContext: + def __init__(self, **kwargs): + self.bits = kwargs.get("bits", 16) + self.channel = kwargs.get("channel", 1) + self.sample_rate = kwargs.get("sample_rate", 16000) + self.audio_format = kwargs.get("format", "wav") + self.enable_words = kwargs.get("enable_words", True) + self.char_contains_rate = kwargs.get("char_contains_rate", 0.8) + self.lang = os.getenv("lang") + if self.lang is None: + self.lang = kwargs.get("lang", "en") + self.stream = kwargs.get("stream", True) + + self.wait_time = float(os.getenv("wait_time", 0.1)) + self.chunk_size = self.sample_rate * self.bits / 8 * self.wait_time + if int(os.getenv('chunk_size_set', 0)): + self.chunk_size = int(os.getenv('chunk_size_set', 0)) + + self.audio_length = 0 + self.file_path = "" + + self.labels: List[LabelContext] = kwargs.get("labels", []) + self.preds: List[PredContext] = kwargs.get("preds", []) + + self.label_sentences: List[str] = [] + self.pred_sentences: List[str] = [] + + self.send_time_start_end = [] + self.recv_time_start_end = [] + + self.fail = False + self.fail_char_contains_rate_num = 0 + + self.punctuation_num = 0 + self.pred_punctuation_num = 0 + + def append_labels(self, voices: List[Dict]): + for voice_data in voices: + label_context = LabelContext(**voice_data) + self.labels.append(label_context) + + def append_preds( + self, + predict_data: List[StreamDataModel], + send_time: List[float], + recv_time: List[float], + ): + self.send_time_start_end = [send_time[0], send_time[-1]] if len(send_time) > 0 else [] + self.recv_time_start_end = [recv_time[0], recv_time[-1]] if len(recv_time) > 0 else [] + for pred_item, send_time_item, recv_time_item in zip(predict_data, send_time, recv_time): + pred_item = deepcopy(pred_item) + pred_context = PredContext(recognition_results=pred_item.model_dump()) + pred_context.send_time = send_time_item + pred_context.recv_time = recv_time_item + self.preds.append(pred_context) + + def to_dict(self): + return { + "bits": self.bits, + "channel": self.channel, + "sample_rate": self.sample_rate, + "audio_format": self.audio_format, + "enable_words": self.enable_words, + "stream": self.stream, + "wait_time": self.wait_time, + "chunk_size": self.chunk_size, + "labels": [item.model_dump_json() for item in self.labels], + "preds": [item.model_dump_json() for item in self.preds], + } diff --git a/schemas/dataset.py b/schemas/dataset.py new file mode 100644 index 0000000..2c940c7 --- /dev/null +++ b/schemas/dataset.py @@ -0,0 +1,18 @@ +from typing import List + +from pydantic import BaseModel, Field + + +class QueryDataSentence(BaseModel): + answer: str = Field(description="文本label") + start: float = Field(description="句子开始时间") + end: float = Field(description="句子结束时间") + + +class QueryData(BaseModel): + lang: str = Field(description="语言") + file: str = Field(description="音频文件位置") + duration: float = Field(description="音频长度") + voice: List[QueryDataSentence] = Field( + description="音频文件的文本label内容" + ) diff --git a/schemas/stream.py b/schemas/stream.py new file mode 100644 index 0000000..44f8111 --- /dev/null +++ b/schemas/stream.py @@ -0,0 +1,66 @@ +from typing import List + +from pydantic import BaseModel, ValidationError, field_validator +from pydantic import model_validator + + +class StreamWordsModel(BaseModel): + text: str + start_time: float + end_time: float + + @model_validator(mode="after") + def check_result(self): + if self.end_time < self.start_time: + raise ValidationError("end-time 小于 start-time, error") + return self + + +class StreamDataModel(BaseModel): + text: str + language: str + final_result: bool + para_seq: int + start_time: float + end_time: float + words: List[StreamWordsModel] + + @model_validator(mode="after") + def check_result(self): + if self.end_time < self.start_time: + raise ValidationError("end-time 小于 start-time, error") + return self + + +class StreamResultModel(BaseModel): + asr_results: StreamDataModel + + @field_validator('asr_results', mode="after") + def convert_to_seconds(cls, v: StreamDataModel, values): + # 在这里处理除以1000的逻辑 + v.end_time = v.end_time / 1000 + v.start_time = v.start_time / 1000 + for word in v.words: + word.start_time /= 1000 + word.end_time /= 1000 + return v + + class Config: + validate_assignment = True + + +class NonStreamDataModel(BaseModel): + text: str + para_seq: int + start_time: float + end_time: float + + @model_validator(mode="after") + def check_result(self): + if self.end_time < self.start_time: + raise ValidationError("end-time 小于 start-time, error") + return self + + +class NonStreamResultModel(BaseModel): + contents: List[NonStreamDataModel] diff --git a/scripts/check_dataset_time.py b/scripts/check_dataset_time.py new file mode 100644 index 0000000..ef07b9f --- /dev/null +++ b/scripts/check_dataset_time.py @@ -0,0 +1,53 @@ +import os +import sys +from collections import defaultdict + +import yaml + + +def main(dataset_dir): + dirs = os.listdir(dataset_dir) + dirs = list( + filter(lambda x: os.path.isdir(os.path.join(dataset_dir, x)), dirs) + ) + + problem_dirs = set() + problem_count = defaultdict(int) + for dir in dirs: + with open(os.path.join(dataset_dir, dir, "data.yaml"), "r") as f: + data = yaml.full_load(f) + for query_i, query in enumerate(data["query_data"]): + voices = sorted(query["voice"], key=lambda x: x["start"]) + if voices != query["voice"]: + print("-----", dir) + if voices[0]["start"] > voices[0]["end"]: + print( + "err1: %s 第%s个query的第%d个voice的start大于end: %s" + % (dir, query_i, 0, voices[0]["answer"]) + ) + problem_dirs.add(dir) + for voice_i in range(1, len(voices)): + voice = voices[voice_i] + if voice["start"] > voice["end"]: + print( + "err1: %s 第%s个query的第%d个voice的start大于end: %s" + % (dir, query_i, voice_i, voice["answer"]) + ) + problem_dirs.add(dir) + if voice["start"] < voices[voice_i - 1]["end"]: + print( + "err2: %s 第%s个query的第%d个voice的start小于前一个voice的end: %s" + % (dir, query_i, voice_i, voice["answer"]) + ) + problem_dirs.add(dir) + problem_count[dir] += 1 + print(len(dirs)) + print(problem_dirs) + print(problem_count) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("指定 测试数据集文件夹") + sys.exit(1) + main(sys.argv[1]) diff --git a/scripts/convert_callback_dataset.py b/scripts/convert_callback_dataset.py new file mode 100644 index 0000000..1f52a33 --- /dev/null +++ b/scripts/convert_callback_dataset.py @@ -0,0 +1,108 @@ +import json +import os +import shutil +import sys +import zipfile + +import yaml + +""" +target +{ + "global": { + "lang": "" + }, + "query_data": [ + "file": "", + "duration": 2.0, + "voice": [ + { + "answer": "", + "start": 0.0, + "end": 1.0 + } + ] + ] +} +""" + + +def situation_a(meta, dataset_folder, output_folder): + """ + { + "combined": { + "en": [ + { + "wav": "*.wav", + "transcriptions": [ + { + "text": "", + "start": 0.0, + "end": 1.0 + } + ], + "duration": 2.0 + } + ] + } + } + """ + meta = meta["combined"] + + for lang, arr in meta.items(): + print("processing", lang) + assert len(lang) == 2 + lang_folder = os.path.join(output_folder, lang) + os.makedirs(lang_folder, exist_ok=True) + data = {"global": {"lang": lang}, "query_data": []} + query_data = data["query_data"] + for item in arr: + os.makedirs( + os.path.join(lang_folder, os.path.dirname(item["wav"])), + exist_ok=True, + ) + mp3_file = item["wav"][:-4] + ".mp3" + shutil.copyfile( + os.path.join(dataset_folder, mp3_file), + os.path.join(lang_folder, mp3_file), + ) + query_data_item = { + "file": mp3_file, + "duration": float(item["duration"]), + "voice": [], + } + query_data.append(query_data_item) + voice = query_data_item["voice"] + for v in item["transcriptions"]: + voice.append( + { + "answer": v["text"], + "start": float(v["start"]), + "end": float(v["end"]), + } + ) + with open(os.path.join(lang_folder, "data.yaml"), "w") as f: + yaml.dump(data, f, indent=2, allow_unicode=True, encoding="utf-8") + with zipfile.ZipFile( + os.path.join(output_folder, lang + ".zip"), "w" + ) as ziper: + dirname = lang_folder + for path, _, files in os.walk(dirname): + for file in files: + ziper.write( + os.path.join(path, file), + os.path.join(path[len(dirname) :], file), + zipfile.ZIP_DEFLATED, + ) + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("指定 数据集文件夹路径 输出路径") + sys.exit(1) + dataset_folder = sys.argv[1] + output_folder = sys.argv[2] + + with open(os.path.join(dataset_folder, "meta.json")) as f: + meta = json.load(f) + situation_a(meta, dataset_folder, output_folder) diff --git a/scripts/debug_detailcase.py b/scripts/debug_detailcase.py new file mode 100644 index 0000000..28b43dc --- /dev/null +++ b/scripts/debug_detailcase.py @@ -0,0 +1,56 @@ +import json +import sys + +from schemas.dataset import QueryData +from schemas.stream import StreamDataModel +from utils.evaluator_plus import evaluate_editops + + +def main(detailcase_file: str): + with open(detailcase_file) as f: + d = json.load(f)[0] + preds = d["preds"] + preds = list(map(lambda x: StreamDataModel(**x), preds)) + preds = list(filter(lambda x: x.final_result, preds)) + label = d["label"] + label = QueryData(**label) + print(evaluate_editops(label, preds)) + + +def evaluate_from_record(detailcase_file: str, record_path: str): + with open(detailcase_file) as f: + d = json.load(f)[0] + label = d["label"] + label = QueryData(**label) + with open(record_path) as f: + record = json.load(f) + tokens_pred = record["tokens_pred"] + tokens_label = record["tokens_label"] + recognition_results = record["recognition_results"] + recognition_results = list( + map(lambda x: StreamDataModel(**x), recognition_results) + ) + a, b = [], [] + for i, rr in enumerate(recognition_results): + if rr.final_result: + a.append(tokens_pred[i]) + b.append(rr) + tokens_pred = a + recognition_results = b + + print( + evaluate_editops( + label, + recognition_results, + tokens_pred, + tokens_label, + ) + ) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("请指定 detailcase 文件路径") + sys.exit(1) + main(sys.argv[1]) + # evaluate_from_record(sys.argv[1], sys.argv[2]) diff --git a/ssh-keygen b/ssh-keygen new file mode 100755 index 0000000..3b76d1b Binary files /dev/null and b/ssh-keygen differ diff --git a/starting_kit/Dockerfile b/starting_kit/Dockerfile new file mode 100644 index 0000000..a1f0693 --- /dev/null +++ b/starting_kit/Dockerfile @@ -0,0 +1,11 @@ +FROM harbor.4pd.io/inf/base-python3.8-ubuntu:1.1.0 + +WORKDIR /workspace + +ADD ./requirements.txt /workspace +RUN pip install -r ./requirements.txt -i https://nexus.4pd.io/repository/pypi-all/simple --trusted-host nexus.4pd.io --extra-index-url https://mirrors.aliyun.com/pypi/simple/ \ + && pip cache purge + +ADD . /workspace + +CMD ["python", "main.py"] diff --git a/starting_kit/main.py b/starting_kit/main.py new file mode 100644 index 0000000..9662fd2 --- /dev/null +++ b/starting_kit/main.py @@ -0,0 +1,313 @@ +import logging +import os +import threading +import time +from typing import Optional + +import flask +import requests +from werkzeug.datastructures import FileStorage + +app = flask.Flask(__name__) +heartbeat_active = False + +log = logging.getLogger(__name__) + +log.propagate = False + +level = logging.INFO + +log.setLevel(level) + +formatter = logging.Formatter( + "[%(asctime)s] %(levelname)s : %(pathname)s:%(lineno)d - %(message)s", + "%Y-%m-%d %H:%M:%S", +) + +streamHandler = logging.StreamHandler() +streamHandler.setLevel(level) +streamHandler.setFormatter(formatter) +log.addHandler(streamHandler) + + +def heartbeat(url): + global heartbeat_active + if heartbeat_active: + return + heartbeat_active = True + while True: + try: + requests.post(url, json={"status": "RUNNING"}) + except Exception: + pass + time.sleep(10) + + +def asr( + audio_file: FileStorage, + language: Optional[str], + progressCallbackUrl: str, + taskId: str, +): + """TODO: 读取audio_file, 调用语音识别服务, 实时返回识别结果""" + + # ignore BEGIN + # 此处为榜单本地测试使用 + if os.getenv("LOCAL_TEST"): + return local_test(progressCallbackUrl, taskId) + # ignore END + + language = "de" + # 某一次识别返回 + requests.post( + progressCallbackUrl, + json={ + "taskId": taskId, + "status": "RUNNING", + "recognition_results": { # 传增量结果, status如果是FINISHED, 或者ERROR, 这个字段请不要传值 + "text": "最先启动的还是", + "final_result": True, + "para_seq": 0, + "language": language, + "start_time": 6300, + "end_time": 6421, + "words": [ + { + "text": "最", + "start_time": 6300, + "end_time": 6321, + }, + { + "text": "先", + "start_time": 6321, + "end_time": 6345, + }, + { + "text": "启", + "start_time": 6345, + "end_time": 6350, + }, + { + "text": "动", + "start_time": 6350, + "end_time": 6370, + }, + { + "text": "的", + "start_time": 6370, + "end_time": 6386, + }, + { + "text": "还", + "start_time": 6386, + "end_time": 6421, + }, + { + "text": "是", + "start_time": 6421, + "end_time": 6435, + }, + ], + }, + }, + ) + # ... 识别结果返回完毕 + + # 识别结束 + requests.post( + progressCallbackUrl, + json={ + "taskId": taskId, + "status": "FINISHED", + }, + ) + + +@app.post("/predict") +def predict(): + body = flask.request.form + language = body.get("language") + if language is None: + "自行判断语种" + taskId = body["taskId"] + progressCallbackUrl = body["progressCallbackUrl"] + heartbeatUrl = body["heartbeatUrl"] + + threading.Thread( + target=heartbeat, args=(heartbeatUrl,), daemon=True + ).start() + + audio_file = flask.request.files["file"] + # audio_file.stream # 读取文件流 + # audio_file.save("audio.mp3") # 保存文件 + threading.Thread( + target=asr, + args=(audio_file, language, progressCallbackUrl, taskId), + daemon=True, + ).start() + return flask.jsonify({"status": "OK"}) + + +# ignore BEGIN +def local_test(progressCallbackUrl: str, taskId: str): + """忽略此方法, 此方法为榜单本地调试使用""" + import random + import re + + import yaml + + def callback(content): + try: + if content is None: + requests.post( + progressCallbackUrl, + json={"taskId": taskId, "status": "FINISHED"}, + ) + else: + requests.post( + progressCallbackUrl, + json={ + "taskId": taskId, + "status": "RUNNING", + "recognition_results": content, + }, + ) + except Exception: + pass + + with open( + os.getenv("LOCAL_TEST_DATA_PATH", "../dataset/out/data.yaml") + ) as f: + data = yaml.full_load(f) + + voices = data["query_data"][0]["voice"] + + # 首次发送 + first_send_time = random.randint(3, 5) + send_interval = random.random() * 0 + log.info("首次发送%ss 发送间隔%ss" % (first_send_time, send_interval)) + time.sleep(first_send_time) + + # 将句子拼接到一起 + if random.random() < 0.3: + log.info("将部分句子合并成单句 每次合并的句子不超过3句") + rand_idx = 0 + rand_sep = [0, len(voices) - 1] + while rand_sep[rand_idx] + 1 <= rand_sep[rand_idx + 1] - 1: + rand_cursep = random.randint( + rand_sep[rand_idx] + 1, + min(rand_sep[rand_idx + 1] - 1, rand_sep[rand_idx] + 1 + 3), + ) + rand_sep.insert(rand_idx + 1, rand_cursep) + rand_idx += 1 + merged_voices = [] + for i, cur_sep in enumerate(rand_sep[:-1]): + voice = voices[cur_sep] + for j in range(cur_sep + 1, rand_sep[i + 1]): + voice["answer"] += voices[j]["answer"] + voice["end"] = voices[j]["end"] + merged_voices.append(voice) + merged_voices.append(voices[rand_sep[-1]]) + voices = merged_voices + + def split_and_keep(text, delimiters): + # 构建正则表达式模式,匹配文本或分隔符 + pattern = "|".join(re.escape(delimiter) for delimiter in delimiters) + pattern = f"(?:[^{pattern}]+|[{pattern}])" + return re.findall(pattern, text) + + puncs = [",", ".", "?", "!", ";", ":"] + + para_seq = 0 + for voice in voices: + answer: str = voice["answer"] + start_time: float = voice["start"] + end_time: float = voice["end"] + words = split_and_keep(answer, puncs) + temp_words = [] + for i, word in enumerate(words): + if i > 0 and i < len(words) - 1 and random.random() < 0.15: + log.info("随机删除word") + continue + temp_words.extend(word.split(" ")) + if len(temp_words) == 0: + temp_words = words[0].split(" ") + words = temp_words + answer = " ".join(words) + words = list(map(lambda x: x.strip(), words)) + words = list(filter(lambda x: len(x) > 0, words)) + + # 将时间均匀分配到每个字上 + words_withtime = [] + word_unittime = (end_time - start_time) / len(words) + for i, word in enumerate(words): + word_start = start_time + word_unittime * i + word_end = word_start + word_unittime + words_withtime.append( + { + "text": word, + "start_time": word_start * 1000, + "end_time": word_end * 1000, + } + ) + + # 将句子首尾的标点符号时间扩展到字上 标点符号时间为瞬间 + punc_at = 0 + while punc_at < len(words) and words[punc_at] in puncs: + punc_at += 1 + if punc_at < len(words): + words_withtime[punc_at]["start_time"] = words_withtime[0][ + "start_time" + ] + for i in range(0, punc_at): + words_withtime[i]["start_time"] = words_withtime[0]["start_time"] + words_withtime[i]["end_time"] = words_withtime[0]["start_time"] + punc_at = len(words) - 1 + while punc_at >= 0 and words[punc_at] in puncs: + punc_at -= 1 + if punc_at >= 0: + words_withtime[punc_at]["end_time"] = words_withtime[-1]["end_time"] + for i in range(punc_at + 1, len(words)): + words_withtime[i]["start_time"] = ( + words_withtime[-1]["end_time"] + 0.1 + ) + words_withtime[i]["end_time"] = words_withtime[-1]["end_time"] + 0.1 + + if random.random() < 0.4 and len(words_withtime) > 1: + log.info("发送一次final_result=False") + rand_idx = random.randint(1, len(words_withtime) - 1) + recognition_result = { + "text": " ".join( + map(lambda x: x["text"], words_withtime[:rand_idx]) + ), + "final_result": False, + "para_seq": para_seq, + "language": "de", + "start_time": start_time * 1000, + "end_time": end_time * 1000, + "words": words_withtime[:rand_idx], + } + callback(recognition_result) + + recognition_result = { + "text": answer, + "final_result": True, + "para_seq": para_seq, + "language": "de", + "start_time": start_time * 1000, + "end_time": end_time * 1000, + "words": words_withtime, + } + callback(recognition_result) + para_seq += 1 + log.info("send %s" % para_seq) + + time.sleep(send_interval) + + callback(None) + + +# ignore END + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=80) diff --git a/starting_kit/requirements.txt b/starting_kit/requirements.txt new file mode 100644 index 0000000..09a313b --- /dev/null +++ b/starting_kit/requirements.txt @@ -0,0 +1,3 @@ +flask +requests +pyyaml diff --git a/tests/test_callback_editops.py b/tests/test_callback_editops.py new file mode 100644 index 0000000..90ddefd --- /dev/null +++ b/tests/test_callback_editops.py @@ -0,0 +1,16 @@ +import json + +from schemas.dataset import QueryData +from schemas.stream import StreamDataModel +from utils.evaluator_plus import evaluate_editops + +with open("out/detail_cases.json") as f: + detail_cases = json.load(f) + +detail_case = detail_cases[0] +preds = [] +for pred in detail_case["preds"]: + preds.append(StreamDataModel.model_validate(pred)) +label = QueryData.model_validate(detail_case["label"]) + +print(evaluate_editops(label, preds)) diff --git a/tests/test_cer.py b/tests/test_cer.py new file mode 100644 index 0000000..6c5a66f --- /dev/null +++ b/tests/test_cer.py @@ -0,0 +1,93 @@ +""" +f(a, b) 计算 a -> b 的编辑距离,使用的方法是之前asr榜单的方法 +g(a, b) 计算 a -> b 的编辑距离,使用的是原始的编辑距离计算方法 +test() 是对拍程序 +""" + +import random +import string +from copy import deepcopy +from typing import List, Tuple + +import Levenshtein + + +def mapping(gt: str, dt: str): + return [i for i in gt], [i for i in dt] + + +def token_mapping( + tokens_gt: List[str], tokens_dt: List[str] +) -> Tuple[List[str], List[str]]: + arr1 = deepcopy(tokens_gt) + arr2 = deepcopy(tokens_dt) + operations = Levenshtein.editops(arr1, arr2) + for op in operations[::-1]: + if op[0] == "insert": + arr1.insert(op[1], None) + elif op[0] == "delete": + arr2.insert(op[2], None) + return arr1, arr2 + + +def cer(tokens_gt_mapping: List[str], tokens_dt_mapping: List[str]): + """输入的是经过编辑距离映射后的两个 token 序列,返回 1-cer, token-cnt""" + insert = sum(1 for item in tokens_gt_mapping if item is None) + delete = sum(1 for item in tokens_dt_mapping if item is None) + equal = sum( + 1 + for token_gt, token_dt in zip(tokens_gt_mapping, tokens_dt_mapping) + if token_gt == token_dt + ) + replace = len(tokens_gt_mapping) - insert - equal # - delete + return replace, delete, insert + + +def f(a, b): + return cer(*token_mapping(*mapping(a, b))) + + +def raw(tokens_gt, tokens_dt): + arr1 = deepcopy(tokens_gt) + arr2 = deepcopy(tokens_dt) + operations = Levenshtein.editops(arr1, arr2) + insert = 0 + delete = 0 + replace = 0 + for op in operations: + if op[0] == "insert": + insert += 1 + if op[0] == "delete": + delete += 1 + if op[0] == "replace": + replace += 1 + return replace, delete, insert + + +def g(a, b): + return raw(*mapping(a, b)) + + +def check(a, b): + ff = f(a, b) + gg = g(a, b) + if ff != gg: + print(ff, gg) + return ff == gg + + +def random_string(length): + letters = string.ascii_lowercase + return "".join(random.choice(letters) for i in range(length)) + + +def test(): + for _ in range(10000): + a = random_string(30) + b = random_string(30) + if not check(a, b): + print(a, b) + break + + +test() diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/asr_ter.py b/utils/asr_ter.py new file mode 100644 index 0000000..4d74829 --- /dev/null +++ b/utils/asr_ter.py @@ -0,0 +1,57 @@ +# copy from +# https://gitlab.4pd.io/scene_lab/leaderboard/judge_flows/foundamental_capability/blob/master/utils/asr_ter.py + + +def calc_ter_speechio(pred, ref, language="zh"): + assert language == "zh", "Unsupported language %s" % language + assert ref is not None and ref != "", "Reference script cannot be empty" + if language == "zh": + from .speechio import error_rate_zh as error_rate + from .speechio import textnorm_zh as textnorm + + normalizer = textnorm.TextNorm( + to_banjiao=True, + to_upper=True, + to_lower=False, + remove_fillers=True, + remove_erhua=True, + check_chars=False, + remove_space=False, + cc_mode="", + ) + norm_pred = normalizer(pred if pred is not None else "") + norm_ref = normalizer(ref) + tokenizer = "char" + alignment, score = error_rate.EditDistance( + error_rate.tokenize_text(norm_ref, tokenizer), + error_rate.tokenize_text(norm_pred, tokenizer), + ) + c, s, i, d = error_rate.CountEdits(alignment) + ter = error_rate.ComputeTokenErrorRate(c, s, i, d) / 100.0 + return {"ter": ter, "err_token_cnt": s + d + i, "ref_all_token_cnt": s + d + c} + assert False, "Bug, not reachable" + + +def calc_ter_wjs(pred, ref, language="zh"): + assert language == "zh", "Unsupported language %s" % language + assert ref is not None and ref != "", "Reference script cannot be empty" + from . import wjs_asr_wer + + ignore_words = set() + case_sensitive = False + split = None + calculator = wjs_asr_wer.Calculator() + norm_pred = wjs_asr_wer.normalize( + wjs_asr_wer.characterize(pred if pred is not None else ""), + ignore_words, + case_sensitive, + split, + ) + norm_ref = wjs_asr_wer.normalize(wjs_asr_wer.characterize(ref), ignore_words, case_sensitive, split) + result = calculator.calculate(norm_pred, norm_ref) + ter = ((result["ins"] + result["sub"] + result["del"]) * 1.0 / result["all"]) if result["all"] != 0 else 1.0 + return { + "ter": ter, + "err_token_cnt": result["ins"] + result["sub"] + result["del"], + "ref_all_token_cnt": result["all"], + } diff --git a/utils/client.py b/utils/client.py new file mode 100644 index 0000000..3c47df9 --- /dev/null +++ b/utils/client.py @@ -0,0 +1,224 @@ +import json +import os +import threading +import time +import traceback +from copy import deepcopy +from typing import Any, List + +import websocket +from pydantic_core import ValidationError +from websocket import create_connection + +from schemas.context import ASRContext +from schemas.stream import StreamDataModel, StreamResultModel +from utils.logger import logger + +IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None + + +class Client: + def __init__(self, sut_url: str, context: ASRContext) -> None: + # base_url = "ws://127.0.0.1:5003" + self.base_url = sut_url + "/recognition" + logger.info(f"{self.base_url}") + self.context: ASRContext = deepcopy(context) + # if not os.getenv("DATASET_FILEPATH", ""): + # self.base_url = "wss://speech.4paradigm.com/aibuds/api/v1/recognition" + # self.base_url = "ws://localhost:5003/recognition" + self.connect_num = 0 + self.exception = False + self.close_time = 10**50 + self.send_time: List[float] = [] + self.recv_time: List[float] = [] + self.predict_data: List[Any] = [] + self.success = True + + def action(self): + # 如果 5 次初始化都失败,则退出 + connect_success = False + for i in range(5): + try: + self._connect_init() + connect_success = True + break + except Exception as e: + logger.error(f"第 {i+1} 次连接失败,原因:{e}") + time.sleep(int(os.getenv("connect_sleep", 10))) + if not connect_success: + exit(-1) + self.trecv = threading.Thread(target=self._recv) + self.trecv.start() + self._send() + self._close() + return self._gen_result() + + def _connect_init(self): + end_time = time.time() + float(os.getenv("end_time", 2)) + success = False + try: + self.ws = create_connection(self.base_url) + self.ws.send(json.dumps(self._gen_init_data())) + while time.time() < end_time and not success: + data = self.ws.recv() + logger.info(f"data {data}") + if len(data) == 0: + time.sleep(1) + continue + if isinstance(data, str): + try: + data = json.loads(data) + except Exception: + raise Exception("初始化阶段,数据不是 json 字符串格式,终止流程") + if isinstance(data, dict): + success = data.get("success", False) + if not success: + logger.error(f"初始化失败,返回的结果为 {data},终止流程") + else: + break + logger.error("初始化阶段,数据不是 json 字符串格式,终止流程") + exit(-1) + except websocket.WebSocketConnectionClosedException or TimeoutError: + raise Exception("初始化阶段连接中断,终止流程") + # exit(-1) + except ConnectionRefusedError: + raise Exception("初始化阶段,连接失败,等待 10s 后重试,最多重试 5 次") + # logger.error("初始化阶段,连接失败,等待 10s 后重试,最多重试 5 次") + # self.connect_num += 1 + # if self.connect_num <= 4: + # time.sleep(int(os.getenv("connect_sleep", 10))) + # self._connect_init() + # success = True + # else: + # logger.error("初始化阶段连接失败多次") + # exit(-1) + if not success: + # logger.error("初始化阶段 60s 没有返回数据,时间太长,终止流程") + raise Exception("初始化阶段 60s 没有返回数据,时间太长,终止流程") + else: + logger.info("建立连接成功") + self.connect_num = 0 + + def _send(self): + send_ts = float(os.getenv("send_interval", 60)) + if not self.success: + return + + with open(self.context.file_path, "rb") as fp: + wav_data = fp.read() + meta_length = wav_data.index(b"data") + 8 + + try: + with open(self.context.file_path, "rb") as fp: + # 去掉 wav 文件的头信息 + fp.read(meta_length) + # 上一段音频的发送时间 + last_send_time = -1 + # 正文内容 + while True: + now_time = time.perf_counter() + if last_send_time == -1: + chunk = fp.read(int(self.context.chunk_size)) + else: + interval_cnt = max( + int((now_time - last_send_time) / self.context.wait_time), + 1, + ) + chunk = fp.read(int(self.context.chunk_size * interval_cnt)) + if not chunk: + break + send_time_start = time.perf_counter() + self.ws.send(chunk, websocket.ABNF.OPCODE_BINARY) + self.send_time.append(send_time_start) + last_send_time = send_time_start + send_time_end = time.perf_counter() + if send_time_end - send_time_start > send_ts: + logger.error(f"发送延迟已经超过 {send_ts}s, 终止当前音频发送") + break + if (sleep_time := self.context.wait_time + now_time - send_time_end) > 0: + time.sleep(sleep_time) + logger.info("当条语音数据发送完成") + self.ws.send(json.dumps({"end": True})) + logger.info("2s 后关闭双向连接.") + except BrokenPipeError: + logger.error("发送数据出错,被测服务出现故障") + except Exception as e: + logger.error(f"Exception: {e}") + logger.error(f"{traceback.print_exc()}") + logger.error("发送数据失败") + self.success = False + # self.close_time = time.perf_counter() + int(os.getenv("api_timeout", 2)) + self.close_time = time.perf_counter() + 20 * 60 + + def _recv(self): + try: + while self.ws.connected and self.success: + recv_data = self.ws.recv() + if isinstance(recv_data, str): + if recv_data := str(recv_data): + self.recv_time.append(time.perf_counter()) + # 识别到最后的合并结果后再关闭 + recognition_results = StreamResultModel(**json.loads(recv_data)).recognition_results + if ( + recognition_results.final_result + and recognition_results.start_time == 0 + and recognition_results.end_time == 0 + and recognition_results.para_seq == 0 + ): + self.success = False + else: + self.predict_data.append(recv_data) + # if recv_data.recognition_results.final_result and (IN_TEST or os.getenv('test')): + # logger.info(f"recv_data {recv_data}") + else: + self.success = False + raise Exception("返回的结果不是字符串形式") + except websocket.WebSocketConnectionClosedException: + logger.error("WebSocketConnectionClosedException") + except ValidationError as e: + logger.error("返回的结果不符合格式") + logger.error(f"Exception is {e}") + os._exit(1) + except OSError: + pass + except Exception: + logger.error(f"{traceback.print_exc()}") + logger.error("处理被测服务返回数据时出错") + self.success = False + + def _close(self): + while time.perf_counter() < self.close_time and self.success: + # while not self.success: + time.sleep(1) + try: + self.ws.close() + except Exception as e: + print(e) + pass + + def _gen_result(self) -> dict: + if not self.predict_data: + logger.error("没有任何数据返回") + self.predict_data = [StreamResultModel(**json.loads(data)).recognition_results for data in self.predict_data] + # for item in self.predict_data: + # if item.final_result and (IN_TEST or os.getenv('test')): + # logger.info(f"recv_data {item}") + + return { + "fail": not self.predict_data, + "send_time": self.send_time, + "recv_time": self.recv_time, + "predict_data": self.predict_data, + } + + def _gen_init_data(self) -> dict: + return { + "parameter": { + "lang": self.context.lang, + "sample_rate": self.context.sample_rate, + "channel": self.context.channel, + "format": self.context.audio_format, + "bits": self.context.bits, + "enable_words": self.context.enable_words, + } + } diff --git a/utils/client_async.py b/utils/client_async.py new file mode 100644 index 0000000..7a98db2 --- /dev/null +++ b/utils/client_async.py @@ -0,0 +1,277 @@ +import asyncio +import json +import os +import time +import traceback +from copy import deepcopy +from enum import Enum +from typing import Any, List + +import websockets +from pydantic_core import ValidationError + +from schemas.context import ASRContext +from schemas.stream import StreamResultModel, StreamWordsModel +from utils.logger import logger + +IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None + + +class STATUS_DATA(str, Enum): + WAITING_FIRST_INIT = "waiting_first_init" + FIRST_FAIL = "fail" + WAITING_SECOND_INIT = "waiting_second_init" + SECOND_INIT = "second_fail" + WAITING_THIRD_INIT = "waiting_third_init" + THIRD_INIT = "third_fail" + SUCCESS = "success" + CLOSED = "closed" + + +class ClientAsync: + def __init__(self, sut_url: str, context: ASRContext, idx: int) -> None: + # base_url = "ws://127.0.0.1:5003" + self.base_url = sut_url + "/recognition" + self.context: ASRContext = deepcopy(context) + self.idx = idx + # if not os.getenv("DATASET_FILEPATH", ""): + # self.base_url = "wss://speech.4paradigm.com/aibuds/api/v1/recognition" + # self.base_url = "ws://localhost:5003/recognition" + self.fail_count = 0 + self.close_time = 10**50 + self.send_time: List[float] = [] + self.recv_time: List[float] = [] + self.predict_data: List[Any] = [] + + async def _sender( + self, websocket: websockets.WebSocketClientProtocol, send_queue: asyncio.Queue, recv_queue: asyncio.Queue + ): + # 设置 websocket 缓冲区大小 + websocket.transport.set_write_buffer_limits(1024 * 1024 * 1024) + + # 发送初始化数据 + await websocket.send(json.dumps(self._gen_init_data())) + await send_queue.put(STATUS_DATA.WAITING_FIRST_INIT) + connect_status = await recv_queue.get() + if connect_status == STATUS_DATA.FIRST_FAIL: + return + + # 开始发送音频 + with open(self.context.file_path, "rb") as fp: + wav_data = fp.read() + meta_length = wav_data.index(b"data") + 8 + try: + with open(self.context.file_path, "rb") as fp: + # 去掉 wav 文件的头信息 + fp.read(meta_length) + wav_time = 0.0 + label_id = 0 + char_contains_rate_checktime = [] + char_contains_rate_checktime_id = 0 + while True: + now_time = time.perf_counter() + chunk = fp.read(int(self.context.chunk_size)) + if not chunk: + break + wav_time += self.context.wait_time + try: + self.send_time.append(time.perf_counter()) + await asyncio.wait_for(websocket.send(chunk), timeout=0.08) + except asyncio.exceptions.TimeoutError: + pass + while label_id < len(self.context.labels) and wav_time >= self.context.labels[label_id].start: + char_contains_rate_checktime.append(now_time + 3.0) + label_id += 1 + predict_text_len = sum(map(lambda x: len(x.text), self.predict_data)) + while char_contains_rate_checktime_id < len(char_contains_rate_checktime) and \ + char_contains_rate_checktime[char_contains_rate_checktime_id] <= now_time: + label_text_len = sum( + map(lambda x: len(x.answer), + self.context.labels[:char_contains_rate_checktime_id+1])) + if predict_text_len / self.context.char_contains_rate < label_text_len: + self.context.fail_char_contains_rate_num += 1 + char_contains_rate_checktime_id += 1 + await asyncio.sleep(max(0, self.context.wait_time - (time.perf_counter() - now_time))) + await websocket.send(json.dumps({"end": True})) + logger.info(f"第 {self.idx} 条数据,当条语音数据发送完成") + logger.info(f"第 {self.idx} 条数据,3s 后关闭双向连接.") + self.close_time = time.perf_counter() + 3 + except websockets.exceptions.ConnectionClosedError: + logger.error(f"第 {self.idx} 条数据发送过程中,连接断开") + except Exception: + logger.error(f"{traceback.print_exc()}") + logger.error(f"第 {self.idx} 条数据,发送数据失败") + + async def _recv( + self, websocket: websockets.WebSocketClientProtocol, send_queue: asyncio.Queue, recv_queue: asyncio.Queue + ): + await recv_queue.get() + try: + await asyncio.wait_for(websocket.recv(), timeout=2) + except asyncio.exceptions.TimeoutError: + await send_queue.put(STATUS_DATA.FIRST_FAIL) + logger.info(f"第 {self.idx} 条数据,初始化阶段, 2s 没收到 success 返回,超时了") + self.fail_count += 1 + return + except Exception as e: + await send_queue.put(STATUS_DATA.FIRST_FAIL) + logger.error(f"第 {self.idx} 条数据,初始化阶段, 收到异常:{e}") + self.fail_count += 1 + return + else: + await send_queue.put(STATUS_DATA.SUCCESS) + + # 开始接收语音识别结果 + try: + while websocket.open: + # 接收数据 + recv_data = await websocket.recv() + if isinstance(recv_data, str): + self.recv_time.append(time.perf_counter()) + recv_data = str(recv_data) + recv_data = json.loads(recv_data) + result = StreamResultModel(**recv_data) + recognition_results = result.asr_results + if ( + recognition_results.final_result + and not recognition_results.language + and recognition_results.start_time == 0 + and recognition_results.end_time == 0 + and recognition_results.para_seq == 0 + ): + pass + else: + self.predict_data.append(recognition_results) + else: + raise Exception("返回的结果不是字符串形式") + except websockets.exceptions.ConnectionClosedOK: + pass + except websockets.exceptions.ConnectionClosedError: + pass + except ValidationError as e: + logger.error(f"第 {self.idx} 条数据,返回的结果不符合格式") + logger.error(f"Exception is {e}") + os._exit(1) + except OSError: + pass + except Exception: + logger.error(f"{traceback.print_exc()}") + logger.error(f"第 {self.idx} 条数据,处理被测服务返回数据时出错") + + async def _action(self): + logger.info(f"第 {self.idx} 条数据开始测试") + + while self.fail_count < 3: + + send_queue = asyncio.Queue() + recv_queue = asyncio.Queue() + + self.send_time: List[float] = [] + self.recv_time: List[float] = [] + self.predict_data: List[Any] = [] + + async with websockets.connect(self.base_url) as websocket: + send_task = asyncio.create_task(self._sender(websocket, send_queue, recv_queue)) + recv_task = asyncio.create_task(self._recv(websocket, recv_queue, send_queue)) + + await asyncio.gather(send_task) + await asyncio.sleep(3) + + await asyncio.gather(recv_task) + + if self.send_time: + break + else: + self.fail_count += 1 + logger.info(f"第 {self.idx} 条数据,初始化阶段, 第 {self.fail_count} 次失败, 1s 后重试") + time.sleep(1) + + def action(self): + asyncio.run(self._action()) + return self._gen_result() + + def _gen_result(self) -> ASRContext: + if not self.predict_data: + logger.error(f"第 {self.idx} 条数据,没有任何数据返回") + self.context.append_preds(self.predict_data, self.send_time, self.recv_time) + self.context.fail = not self.predict_data + + punctuation_words: List[StreamWordsModel] = [] + for pred in self.predict_data: + punctuations = [",", ".", "!", "?"] + if pred.language == "zh": + punctuations = [",", "。", "!", "?"] + elif pred.language == "ja": + punctuations = ["、", "。", "!", "?"] + elif pred.language in ("ar", "fa"): + punctuations = ["،", ".", "!", "؟"] + elif pred.language == "el": + punctuations = [",", ".", "!", ";"] + elif pred.language == "ti": + punctuations = ["།"] + + for word in pred.words: + if word.text in punctuations: + punctuation_words.append(word) + start_times = list(map(lambda x: x.start_time, punctuation_words)) + start_times = sorted(start_times) + end_times = list(map(lambda x: x.end_time, punctuation_words)) + end_times = sorted(end_times) + + self.context.punctuation_num = len(self.context.labels) + label_n = len(self.context.labels) + for i, label in enumerate(self.context.labels): + label_left = (label.end - 0.7) + label_right = (label.end + 0.7) + if i < label_n - 1: + label_left = label.end + label_right = self.context.labels[i+1].start + + exist = False + + def upper_bound(x: float, lst: List[float]) -> int: + ans = -1 + left, right = 0, len(lst) - 1 + while left <= right: + mid = (left + right) // 2 + if lst[mid] >= x: + ans = mid + right = mid - 1 + else: + left = mid + 1 + return ans + + def lower_bound(x: float, lst: List[float]) -> int: + ans = -1 + left, right = 0, len(lst) - 1 + while left <= right: + mid = (left + right) // 2 + if lst[mid] <= x: + ans = mid + left = mid + 1 + else: + right = mid - 1 + return ans + + left_in_pred = upper_bound(label_left, start_times) + if left_in_pred != -1 and start_times[left_in_pred] <= label_right: + exist = True + right_in_pred = lower_bound(label_right, end_times) + if right_in_pred != -1 and end_times[right_in_pred] >= label_left: + exist = True + + if exist: + self.context.pred_punctuation_num += 1 + return self.context + + def _gen_init_data(self) -> dict: + return { + "parameter": { + "lang": None, + "sample_rate": self.context.sample_rate, + "channel": self.context.channel, + "format": self.context.audio_format, + "bits": self.context.bits, + "enable_words": self.context.enable_words, + } + } diff --git a/utils/client_callback.py b/utils/client_callback.py new file mode 100644 index 0000000..407099a --- /dev/null +++ b/utils/client_callback.py @@ -0,0 +1,409 @@ +import logging +import os +import threading +import time +from typing import Dict, List, Optional + +import requests +from flask import Flask, abort, request +from pydantic import BaseModel, Field, ValidationError, field_validator + +from schemas.dataset import QueryData +from schemas.stream import StreamDataModel +from utils.evaluator_plus import evaluate_editops, evaluate_punctuation + +from .logger import log + +MY_POD_IP = os.environ["MY_POD_IP"] + + +class StopException(Exception): ... + + +class EvaluateResult(BaseModel): + lang: str + cer: float + align_start: Dict[int, int] = Field( + description="句首字对齐时间差值(ms) -> 对齐数" + ) + align_end: Dict[int, int] = Field( + description="句尾字对齐时间差值(ms) -> 对齐数" + ) + first_word_distance_sum: float = Field(description="句首字距离总和(s)") + last_word_distance_sum: float = Field(description="句尾字距离总和(s)") + rtf: float = Field(description="翻译速度") + first_receive_delay: float = Field(description="首包接收延迟(s)") + query_count: int = Field(description="音频数") + voice_count: int = Field(description="句子数") + pred_punctuation_num: int = Field(description="预测标点数") + label_punctuation_num: int = Field(description="标注标点数") + pred_sentence_punctuation_num: int = Field(description="预测句子标点数") + label_setence_punctuation_num: int = Field(description="标注句子标点数") + preds: List[StreamDataModel] = Field(description="预测结果") + label: QueryData = Field(description="标注结果") + + +class ResultModel(BaseModel): + taskId: str + status: str + message: str = Field("") + recognition_results: Optional[StreamDataModel] = Field(None) + + @field_validator("recognition_results", mode="after") + def convert_to_seconds(cls, v: Optional[StreamDataModel], values): + # 在这里处理除以1000的逻辑 + if v is None: + return v + v.end_time = v.end_time / 1000 + v.start_time = v.start_time / 1000 + for word in v.words: + word.start_time /= 1000 + word.end_time /= 1000 + return v + + +class ClientCallback: + def __init__(self, sut_url: str, port: int): + self.sut_url = sut_url #sut_url:ASR 服务的 URL(如 http://asr-service:8080) + self.port = port #port:当前客户端监听的端口(用于接收回调) + + #创建 Flask 应用并注册路由 + self.app = Flask(__name__) + self.app.add_url_rule( + "/api/asr/batch-callback/", + view_func=self.asr_callback, + methods=["POST"], + ) + self.app.add_url_rule( + "/api/asr-runner/report", + view_func=self.heartbeat, + methods=["POST"], + ) + """ + 路由 1:/api/asr/batch-callback/ + 接收 ASR 服务的识别结果回调(self.asr_callback 处理)。 + taskId 是路径参数,用于标识具体任务。 + 路由 2:/api/asr-runner/report + 接收 ASR 服务的心跳检测请求(self.heartbeat 处理)。 + """ + + logging.getLogger("werkzeug").disabled = True + threading.Thread( + target=self.app.run, args=("0.0.0.0", port), daemon=True + ).start() + self.mutex = threading.Lock() + self.finished = threading.Event() + self.product_avaiable = True + + self.reset() + + def reset(self): + self.begin_time = None + self.end_time = None + self.first_receive_time = None + self.last_heartbeat_time = None + self.app_on = False + self.para_seq = 0 + self.finished.clear() + self.error: Optional[str] = None + self.last_recognition_result: Optional[StreamDataModel] = None + self.recognition_results: List[StreamDataModel] = [] + + def asr_callback(self, taskId: str): + if self.app_on is False: + abort(400) + body = request.get_json(silent=True) # 静默解析JSON,失败时返回None + if body is None: + abort(404) + try: + result = ResultModel.model_validate(body) #将回调的 JSON 数据解析为 ResultModel 对象,确保结构符合预期。 + except ValidationError as e: + log.error("asr_callback: 结果格式错误: %s", e) + abort(404) + + #处理任务完成状态(FINISHED) + if result.status == "FINISHED": + with self.mutex: + self.stop() + return "ok" + #处理非运行状态(非 RUNNING) + if result.status != "RUNNING": + log.error( + "asr_callback: 结果状态错误: %s, message: %s", + result.status, + result.message, + ) + abort(404) + + recognition_result = result.recognition_results + if recognition_result is None: + log.error("asr_callback: 结果中没有recognition_results字段") + abort(404) + + with self.mutex: + if not self.app_on: + log.error("asr_callback: 应用已结束") + abort(400) + + if recognition_result.para_seq < self.para_seq: + error = "asr_callback: 结果中para_seq小于上一次的: %d < %d" % ( + recognition_result.para_seq, + self.para_seq, + ) + log.error(error) + if self.error is None: + self.error = error + self.stop() + abort(404) + if recognition_result.para_seq > self.para_seq + 1: + error = ( + "asr_callback: 结果中para_seq大于上一次的+1 \ +说明存在para_seq = %d没有final_result为True确认" + % (self.para_seq + 1,) + ) + log.error(error) + if self.error is None: + self.error = error + self.stop() + abort(404) + if ( + self.last_recognition_result is not None + and recognition_result.start_time + < self.last_recognition_result.end_time + ): + error = "asr_callback: 结果中start_time小于上一次的end_time: %s < %s" % ( + recognition_result.start_time, + self.last_recognition_result.end_time, + ) + log.error(error) + if self.error is None: + self.error = error + self.stop() + abort(404) + + self.recognition_results.append(recognition_result) + if recognition_result.final_result is True: + self.para_seq = recognition_result.para_seq + if self.last_recognition_result is None: + self.first_receive_time = time.time() + self.last_recognition_result = recognition_result + + return "ok" + + """ + def heartbeat(self): + if self.app_on is False: + abort(400) + body = request.get_json(silent=True) + if body is None: + abort(404) + status = body.get("status") + if status != "RUNNING": + message = body.get("message", "") + if message: + message = ", message: " + message + log.error("heartbeat: 状态错误: %s%s", status, message) + return "ok" + + with self.mutex: + self.last_heartbeat_time = time.time() + return "ok" + + """ + + def predict( + self, + language: Optional[str], + audio_file: str, + audio_duration: float, + task_id: str, + ): + #使用互斥锁确保线程安全 + with self.mutex: + if self.app_on: + log.error("上一音频尚未完成处理,流程出现异常") + raise StopException() + self.reset() + self.app_on = True + + #请求URL:self.sut_url + "/predict"(如 http://localhost:8080/predict) + resp = requests.post( + self.sut_url + "/predict", + data={ + "language": language, + "taskId": task_id, + "progressCallbackUrl": "http://%s:%d/api/asr/batch-callback/%s" + % (MY_POD_IP, self.port, task_id), + "heartbeatUrl": "http://%s:%d/api/asr-runner/report" % (MY_POD_IP, self.port), + }, + files={"file": (audio_file, open(audio_file, "rb"))}, + timeout=60, + ) + + #响应处理 + if resp.status_code != 200: + log.error("/predict接口返回http code %s", resp.status_code) + raise StopException() + resp.raise_for_status() + + status = resp.json().get("status") + if status != "OK": + log.error("/predict接口返回非OK状态: %s", status) + raise StopException() + #辅助线程 + threading.Thread( + target=self.dead_line_check, args=(audio_duration,), daemon=True + ).start() + threading.Thread(target=self.heartbeat_check, daemon=True).start() + + def dead_line_check(self, audio_duration: float): + begin_time = time.time() + self.begin_time = begin_time + + # 初始化 10s 延迟检测 + self.sleep_to(begin_time + 10) + with self.mutex: + if self.last_recognition_result is None: + error = "首包延迟内未收到返回" + log.error(error) + if self.error is None: + self.error = error + self.stop() + return + + # 第一次30s检测 + next_checktime = begin_time + 30 + ddl = begin_time + max((audio_duration / 6) + 10, 30) + while time.time() < ddl: + self.sleep_to(next_checktime) + with self.mutex: + if self.finished.is_set(): + return + if self.last_recognition_result is None: + error = "检测追赶线过程中获取最后一次识别结果异常" + log.error(error) + if self.error is None: + self.error = error + self.stop() + return + last_end_time = self.last_recognition_result.end_time + expect_end_time = (next_checktime - begin_time - 30) * 5.4 + if last_end_time < expect_end_time: + log.warning( + "识别时间位置 %s 被死亡追赶线 %s 已追上,将置为产品不可用", + last_end_time, + expect_end_time, + ) + self.product_avaiable = False + self.sleep_to(ddl) + break + next_checktime = last_end_time / 5.4 + begin_time + 30 + 1 + next_checktime = min(next_checktime, ddl) + with self.mutex: + if self.finished.is_set(): + return + + log.warning("识别速度rtf低于1/6, 将置为产品不可用") + self.product_avaiable = False + self.sleep_to(begin_time + max((audio_duration / 3) + 10, 30)) + with self.mutex: + if self.finished.is_set(): + return + error = "处理时间超过ddl %s " % (ddl - begin_time) + log.error(error) + if self.error is None: + self.error = error + self.stop() + return + + def heartbeat_check(self): + self.last_heartbeat_time = time.time() + while True: + with self.mutex: + if self.finished.is_set(): + return + if time.time() - self.last_heartbeat_time > 30: + error = "asr_runner 心跳超时 %s" % ( + time.time() - self.last_heartbeat_time + ) + log.error(error) + if self.error is None: + self.error = error + self.stop() + return + time.sleep(5) + + def sleep_to(self, to: float): + seconds = to - time.time() + if seconds <= 0: + return + time.sleep(seconds) + + def stop(self): + self.end_time = time.time() + self.finished.set() + self.app_on = False + + def evaluate(self, query_data: QueryData): + log.info("开始评估") + if ( + self.begin_time is None + or self.end_time is None + or self.first_receive_time is None + ): + if self.begin_time is None: + log.error("评估流程异常 无开始时间") + if self.end_time is None: + log.error("评估流程异常 无结束时间") + if self.first_receive_time is None: + log.error("评估流程异常 无首次接收时间") + raise StopException() + rtf = max(self.end_time - self.begin_time - 10, 0) / query_data.duration + first_receive_delay = max(self.first_receive_time - self.begin_time, 0) + query_count = 1 + voice_count = len(query_data.voice) + preds = self.recognition_results + self.recognition_results = list( + filter(lambda x: x.final_result, self.recognition_results) + ) + ( + pred_punctuation_num, + label_punctuation_num, + pred_sentence_punctuation_num, + label_setence_punctuation_num, + ) = evaluate_punctuation(query_data, self.recognition_results) + + ( + cer, + _, + align_start, + align_end, + first_word_distance_sum, + last_word_distance_sum, + ) = evaluate_editops(query_data, self.recognition_results) + + if align_start[300] / voice_count < 0.8: + log.warning( + "评估结果首字300ms对齐率 %s < 0.8, 将置为产品不可用", + align_start[300] / voice_count, + ) + self.product_avaiable = False + + return EvaluateResult( + lang=query_data.lang, + cer=cer, + align_start=align_start, + align_end=align_end, + first_word_distance_sum=first_word_distance_sum, + last_word_distance_sum=last_word_distance_sum, + rtf=rtf, + first_receive_delay=first_receive_delay, + query_count=query_count, + voice_count=voice_count, + pred_punctuation_num=pred_punctuation_num, + label_punctuation_num=label_punctuation_num, + pred_sentence_punctuation_num=pred_sentence_punctuation_num, + label_setence_punctuation_num=label_setence_punctuation_num, + preds=preds, + label=query_data, + ) diff --git a/utils/evaluate.py b/utils/evaluate.py new file mode 100644 index 0000000..3ad5bf2 --- /dev/null +++ b/utils/evaluate.py @@ -0,0 +1,445 @@ +import os +import subprocess +from collections import defaultdict +from typing import Dict, List + +from utils import asr_ter +from utils.logger import logger + +log_mid_result = int(os.getenv("log", 0)) == 1 + + +class AsrEvaluator: + def __init__(self) -> None: + self.query_count = 0 # query 数目(语音数目) + self.voice_count = 0 # 有开始和结束时间的语音条数(用于 RTF 计算) + self.cut_punc = [] # 切分标点符号,需要注意切分的时候根据列表中的顺序进行切分,比如 ... 应该放到 . 之前。 + # cer 属性 + self.one_minus_cer = 0 # 每个 query 的 1 - cer 和 + self.token_count = 0 # 每个 query 的字数/词数和 + # 句子切分率属性 + self.miss_count = 0 # 每个 query miss-count 和 + self.more_count = 0 # 每个 query more-count 和 + self.cut_count = 0 # 每个 query cut-count 和 + self.rate = 0 # 每个 query 的 cut-rate 和 + # detail case + self.result = [] + + def evaluate(self, eval_result): + pass + + def post_evaluate(self): + pass + + def gen_result(self) -> Dict: + output_result = dict() + output_result["query_count"] = self.query_count + output_result["voice_count"] = self.voice_count + output_result["token_cnt"] = self.token_count + output_result["one_minus_cer"] = self.one_minus_cer + output_result["one_minus_cer_metrics"] = self.one_minus_cer / self.query_count + output_result["miss_count"] = self.miss_count + output_result["more_count"] = self.more_count + output_result["cut_count"] = self.cut_count + output_result["cut_rate"] = self.rate + output_result["cut_rate_metrics"] = self.rate / self.query_count + output_result["rtf"] = self.rtf + output_result["rtf_end"] = self.rtf_end + output_result["rtf_metrics"] = self.rtf / self.voice_count + output_result["rtf_end_metrics"] = self.rtf_end / self.voice_count + + detail_case = self.result + return output_result, detail_case + + def _get_predict_final_sentences(self, predict_data: List[Dict]) -> List[str]: + """ + 获取 predict data 数据,然后将其中 final 的句子拿出来,放到列表里。 + """ + return [ + item["recoginition_results"]["text"] + for item in predict_data + if item["recoginition_results"]["final_result"] + ] + + def _sentence_final_index(self, sentences: List[str], tokens: List[str], tokenizer="word") -> List[int]: + """ + 获取 sentence 结束的字对应的 token 索引值。 + """ + token_index_list = [] + token_idx = 0 + for sentence in sentences: + for token in Tokenizer.tokenize(sentence, tokenizer): + if token not in tokens: + continue + while tokens[token_idx] != token: + token_idx += 1 + token_index_list.append(token_idx) + return token_index_list + + def _voice_to_cut_sentence(self, voice_sentences: List[str]) -> Dict: + """ + 将数据集的语音片段转换为最小切分单元列表。 + 使用 cut_punc 中的所有 punc 进行依次切分,最后去除掉完全空的内容 + 示例: + ["你好,你好呀", "你好,我在写抽象的代码逻辑"] + -> + cut_sentences: ["你好", "你好呀", "你好", "我在写抽象的代码逻辑"] + cut_sentence_index_list: [1, 3] ("你好呀" 对应 1-idx, "我在写抽象的代码逻辑" 对应 3-idx) + """ + voice_sentences_result = defaultdict(list) + for voice_sentence in voice_sentences: + sentence_list = [voice_sentence] + sentence_tmp_list = [] + for punc in self.cut_punc: + for sentence in sentence_list: + sentence_tmp_list.extend(sentence.split(punc)) + sentence_list, sentence_tmp_list = sentence_tmp_list, [] + sentence_list = [item for item in sentence_list if item] + # 切分后的句子单元 + voice_sentences_result["cut_sentences"].extend(sentence_list) + # 每个语音单元最后一个字对应的句子单元的索引 + voice_sentences_result["cut_sentence_index_list"].append(len(voice_sentences_result["cut_sentences"]) - 1) + return voice_sentences_result + + def _voice_bytes_index(self, timestamp, sample_rate=16000, bit_depth=16, channels=1): + """ + timestamp: 时间, 单位秒 + """ + bytes_per_sample = bit_depth // 8 + return timestamp * sample_rate * bytes_per_sample * channels + + +class AsrZhEvaluator(AsrEvaluator): + """ + 中文的评估方式 + """ + + def __init__(self): + super().__init__() + self.cut_zh_punc = ["······", "......", "。", ",", "?", "!", ";", ":"] + self.cut_en_punc = ["...", ".", ",", "?", "!", ";", ":"] + self.cut_punc = self.cut_zh_punc + self.cut_en_punc + + def evaluate(self, eval_result) -> Dict: + self.query_count += 1 + self.voice_count += len(eval_result["voice"]) + + # 获取,标注结果 & 语音单元(非句子单元) + label_voice_sentences = [item["answer"] for item in eval_result["voice"]] + # print("label_voice_sentences", label_voice_sentences) + # 获取,标注结果 & 语音单元 -> 句子单元的转换情况 + voice_to_cut_info = self._voice_to_cut_sentence(label_voice_sentences) + # print("voice_to_cut_info", voice_to_cut_info) + # 获取,标注结果 & 句子单元 + label_sentences = voice_to_cut_info["cut_sentences"] + # 获取,标注结果 & 语音单元 -> 句子单元的映射关系,每个语音单元最后一个字对应的句子单元的索引 + cut_sentence_index_list = voice_to_cut_info["cut_sentence_index_list"] + # 标注结果 & 句子单元 & norm 操作 + label_sentences = [self._sentence_norm(sentence) for sentence in label_sentences] + if log_mid_result: + logger.info(f"label_sentences {label_sentences}") + # print("label_sentences", label_sentences) + + # 预测结果 & 句子单元 + predict_sentences_raw = self._get_predict_final_sentences(eval_result["predict_data"]) + # print("predict_sentences_raw", predict_sentences_raw) + # 预测结果 & 句子单元 & norm 操作 + predict_sentences = [self._sentence_norm(sentence) for sentence in predict_sentences_raw] + if log_mid_result: + logger.info(f"predict_sentences {predict_sentences}") + # print("predict_sentences", predict_sentences) + + # 基于最小编辑距离进行 token 匹配,获得匹配后的 token 列表 + label_tokens, predict_tokens = self._sentence_transfer("".join(label_sentences), "".join(predict_sentences)) + + # cer 计算 + cer_info = self.cer(label_sentences, predict_sentences) + if log_mid_result: + logger.info(f"cer_info {cer_info}") + # print("cer_info", cer_info) + self.one_minus_cer += cer_info["one_minus_cer"] + self.token_count += cer_info["token_count"] + + # 句子切分准召率 + cut_info = self.cut_rate(label_sentences, predict_sentences, label_tokens, predict_tokens) + + if log_mid_result: + logger.info(f"{cut_info['miss_count']}, {cut_info['more_count']}, {cut_info['rate']}") + # print("cut_info", cut_info) + # print(cut_info["miss_count"], cut_info["more_count"], cut_info["rate"]) + self.miss_count += cut_info["miss_count"] + self.more_count += cut_info["more_count"] + self.cut_count += cut_info["cut_count"] + self.rate += cut_info["rate"] + + self.result.append( + { + "label_tokens": label_tokens, + "predict_tokens": predict_tokens, + "one_minus_cer": cer_info["one_minus_cer"], + "token_count": cer_info["one_minus_cer"], + "miss_count": cut_info["miss_count"], + "more_count": cut_info["more_count"], + "cut_count": cut_info["cut_count"], + "rate": cut_info["rate"], + } + ) + + def cer(self, label_sentences, predict_sentences): + pred_str = ''.join(predict_sentences) if predict_sentences is not None else '' + label_str = ''.join(label_sentences) + r = asr_ter.calc_ter_speechio(pred_str, label_str) + one_minus_cer = max(1.0 - r['ter'], 0) + token_count = r['ref_all_token_cnt'] + return {"one_minus_cer": one_minus_cer, "token_count": token_count} + + def cut_rate(self, label_sentences, predict_sentences, label_tokens, predict_tokens): + label_final_index_list = set(self._sentence_final_index(label_sentences, label_tokens)) + pred_final_index_list = set(self._sentence_final_index(predict_sentences, predict_tokens)) + label_sentence_count = len(label_final_index_list) + miss_count = len(label_final_index_list - pred_final_index_list) + more_count = len(pred_final_index_list - label_final_index_list) + rate = max(1 - (miss_count + more_count * 2) / label_sentence_count, 0) + return { + "miss_count": miss_count, + "more_count": more_count, + "cut_count": label_sentence_count, + "rate": rate, + "label_final_index_list": label_final_index_list, + "pred_final_index_list": pred_final_index_list, + } + + def _sentence_norm(self, sentence, tokenizer="word"): + """ + 对句子进行 norm 操作 + """ + from utils.speechio import textnorm_zh as textnorm + + if tokenizer == "word": + normalizer = textnorm.TextNorm( + to_banjiao=True, + to_upper=True, + to_lower=False, + remove_fillers=True, + remove_erhua=False, # 这里同批量识别不同,改成了 False + check_chars=False, + remove_space=False, + cc_mode="", + ) + return normalizer(sentence) + else: + logger.error("tokenizer error, not support.") + + def _sentence_transfer(self, label_sentence: str, predict_sentence: str, tokenizer="char"): + """ + 基于最小编辑距离,将 label 和 predict 进行字的位置匹配,并生成转换后的结果 + args: + label: "今天的通话质量不错呀昨天的呢" + predict: "今天的通话质量不错昨天呢星期" + tokenizer: 分词方式 + return: + label: ["今", "天", "的", "通", "话", "质", "量", "不", "错", "呀", "昨", "天", "的", "呢", None, None] + predict: ["今", "天", "的", "通", "话", "质", "量", "不", "错", None, "昨", "天", None, "呢", "星", "期"] + """ + from utils.speechio import error_rate_zh as error_rate + + if tokenizer == "char": + alignment, score = error_rate.EditDistance( + error_rate.tokenize_text(label_sentence, tokenizer), + error_rate.tokenize_text(predict_sentence, tokenizer), + ) + label_tokens, pred_tokens = [], [] + for align in alignment: + # print(align.__dict__) + label_tokens.append(align.ref) + pred_tokens.append(align.hyp) + return (label_tokens, pred_tokens) + else: + logger.error("tokenizer 出错了,暂时不支持其它的") + + def _pred_data_transfer(self, predict_data, recv_time): + """ + predict_data = [ + {"recoginition_results": {"text": "1", "final_result": False, "para_seq": 0}}, + {"recoginition_results": {"text": "12", "final_result": False, "para_seq": 0}}, + {"recoginition_results": {"text": "123", "final_result": True, "para_seq": 0}}, + {"recoginition_results": {"text": "4", "final_result": False, "para_seq": 0}}, + {"recoginition_results": {"text": "45", "final_result": False, "para_seq": 0}}, + {"recoginition_results": {"text": "456", "final_result": True, "para_seq": 0}}, + ] + recv_time = [1, 3, 5, 6, 7, 8] + + -> + + [ + [{'text': '1', 'time': 1}, {'text': '12', 'time': 3}, {'text': '123', 'time': 5}], + [{'text': '4', 'time': 6}, {'text': '45', 'time': 7}, {'text': '456', 'time': 8}], + ] + """ + pred_sentence_info = [] + pred_sentence_index = 0 + for predict_item, recv_time_item in zip(predict_data, recv_time): + if len(pred_sentence_info) == pred_sentence_index: + pred_sentence_info.append([]) + pred_sentence_info[pred_sentence_index].append( + { + "text": predict_item["recoginition_results"]["text"], + "time": recv_time_item, + } + ) + if predict_item["recoginition_results"]["final_result"]: + pred_sentence_index += 1 + return pred_sentence_info + + +class AsrEnEvaluator(AsrEvaluator): + """ + 英文的评估方式 + """ + + def evaluate(self, eval_result) -> Dict: + self.query_count += 1 + self.voice_count += len(eval_result["voice"]) + + # 获取,标注结果 & 语音单元(非句子单元) + label_voice_sentences = [item["answer"] for item in eval_result["voice"]] + # print("label_voice_sentences", label_voice_sentences) + # 获取,标注结果 & 语音单元 -> 句子单元的转换情况 + voice_to_cut_info = self._voice_to_cut_sentence(label_voice_sentences) + # print("voice_to_cut_info", voice_to_cut_info) + # 获取,标注结果 & 句子单元 + label_sentences = voice_to_cut_info["cut_sentences"] + # 获取,标注结果 & 语音单元 -> 句子单元的映射关系,每个语音单元最后一个字对应的句子单元的索引 + cut_sentence_index_list = voice_to_cut_info["cut_sentence_index_list"] + # 标注结果 & 句子单元 & norm 操作 + label_sentences = self._sentence_list_norm(label_sentences) + # [self._sentence_norm(sentence) for sentence in label_sentences] + # print("label_sentences", label_sentences) + if log_mid_result: + logger.info(f"label_sentences {label_sentences}") + + # 预测结果 & 句子单元 + predict_sentences_raw = self._get_predict_final_sentences(eval_result["predict_data"]) + # print("predict_sentences_raw", predict_sentences_raw) + # 预测结果 & 句子单元 & norm 操作 + predict_sentences = self._sentence_list_norm(predict_sentences_raw) + # [self._sentence_norm(sentence) for sentence in predict_sentences_raw] + # print("predict_sentences", predict_sentences) + if log_mid_result: + logger.info(f"predict_sentences {predict_sentences}") + + label_tokens, predict_tokens = self._sentence_transfer(" ".join(label_sentences), " ".join(predict_sentences)) + # print(label_tokens) + # print(predict_tokens) + + # cer 计算 + cer_info = self.cer(label_tokens, predict_tokens) + # print("cer_info", cer_info) + if log_mid_result: + logger.info(f"cer_info {cer_info}") + self.one_minus_cer += cer_info["one_minus_cer"] + self.token_count += cer_info["token_count"] + + # 句子切分准召率 + cut_info = self.cut_rate(label_sentences, predict_sentences, label_tokens, predict_tokens) + # print(cut_info["miss_count"], cut_info["more_count"], cut_info["rate"]) + # print("cut_info", cut_info) + if log_mid_result: + logger.info(f"{cut_info['miss_count']}, {cut_info['more_count']}, {cut_info['rate']}") + self.miss_count += cut_info["miss_count"] + self.more_count += cut_info["more_count"] + self.cut_count += cut_info["cut_count"] + self.rate += cut_info["rate"] + + self.result.append( + { + "label_tokens": label_tokens, + "predict_tokens": predict_tokens, + "one_minus_cer": cer_info["one_minus_cer"], + "token_count": cer_info["one_minus_cer"], + "miss_count": cut_info["miss_count"], + "more_count": cut_info["more_count"], + "cut_count": cut_info["cut_count"], + "rate": cut_info["rate"], + } + ) + + def cer(self, label_tokens, predict_tokens): + s, d, i, c = 0, 0, 0, 0 + for label_token, predict_token in zip(label_tokens, predict_tokens): + if label_token == predict_token: + c += 1 + elif predict_token is None: + d += 1 + elif label_token is None: + i += 1 + else: + s += 1 + cer = (s + d + i) / (s + d + c) + one_minus_cer = max(1.0 - cer, 0) + token_count = s + d + c + return {"one_minus_cer": one_minus_cer, "token_count": token_count} + + def cut_rate(self, label_sentences, predict_sentences, label_tokens, predict_tokens): + label_final_index_list = set(self._sentence_final_index(label_sentences, label_tokens, "whitespace")) + pred_final_index_list = set(self._sentence_final_index(predict_sentences, predict_tokens, "whitespace")) + label_sentence_count = len(label_final_index_list) + miss_count = len(label_final_index_list - pred_final_index_list) + more_count = len(pred_final_index_list - label_final_index_list) + rate = max(1 - (miss_count + more_count * 2) / label_sentence_count, 0) + return { + "miss_count": miss_count, + "more_count": more_count, + "cut_count": label_sentence_count, + "rate": rate, + "label_final_index_list": label_final_index_list, + "pred_final_index_list": pred_final_index_list, + } + + def _sentence_list_norm(self, sentence_list, tokenizer="whitespace"): + pwd = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + with open('./predict.txt', 'w', encoding='utf-8') as fp: + for idx, sentence in enumerate(sentence_list): + fp.write('%s\t%s\n' % (idx, sentence)) + subprocess.run( + f'PYTHONPATH={pwd}/utils/speechio python {pwd}/utils/speechio/textnorm_en.py --has_key --to_upper ./predict.txt ./predict_norm.txt', + shell=True, + check=True, + ) + sentence_norm = [] + with open('./predict_norm.txt', 'r', encoding='utf-8') as fp: + for line in fp.readlines(): + line_split_result = line.strip().split('\t', 1) + if len(line_split_result) >= 2: + sentence_norm.append(line_split_result[1]) + # 有可能没有 norm 后就没了 + return sentence_norm + + def _sentence_transfer(self, label_sentence: str, predict_sentence: str, tokenizer="whitespace"): + """ + 基于最小编辑距离,将 label 和 predict 进行字的位置匹配,并生成转换后的结果 + args: + label: "HELLO WORLD ARE U OK YEP" + predict: "HELLO WORLD U ARE U OK YEP" + tokenizer: 分词方式 + return: + label: ["HELLO", "WORLD", None, "ARE", "U", "OK", "YEP"] + predict: ["HELLO", "WORLD", "U", "ARE", "U", "OK", "YEP"] + """ + from utils.speechio import error_rate_zh as error_rate + + if tokenizer == "whitespace": + alignment, score = error_rate.EditDistance( + error_rate.tokenize_text(label_sentence, tokenizer), + error_rate.tokenize_text(predict_sentence, tokenizer), + ) + label_tokens, pred_tokens = [], [] + for align in alignment: + label_tokens.append(align.ref) + pred_tokens.append(align.hyp) + return (label_tokens, pred_tokens) + else: + logger.error("tokenizer 出错了,暂时不支持其它的") + + def post_evaluate(self) -> Dict: + pass diff --git a/utils/evaluator.py b/utils/evaluator.py new file mode 100644 index 0000000..c51d765 --- /dev/null +++ b/utils/evaluator.py @@ -0,0 +1,195 @@ +# coding: utf-8 + +import os +from collections import Counter, defaultdict +from itertools import chain +from typing import List + +from schemas.context import ASRContext +from utils.logger import logger +from utils.metrics import cer, cut_rate, cut_sentence, first_delay +from utils.metrics import mean_on_counter, patch_unique_token_count +from utils.metrics import revision_delay, text_align, token_mapping +from utils.metrics import var_on_counter +from utils.tokenizer import TOKENIZER_MAPPING, Tokenizer +from utils.update_submit import change_product_available + +IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", 1) is None + + +class BaseEvaluator: + def __init__(self) -> None: + self.query_count = 0 # query 数目(语音数目) + self.voice_count = 0 + self.fail_count = 0 # 失败数目 + # 首字延迟 + self.first_delay_sum = 0 + self.first_delay_cnt = 0 + # 修正延迟 + self.revision_delay_sum = 0 + self.revision_delay_cnt = 0 + # patch token 信息 + self.patch_unique_cnt_counter = Counter() + # text align count + self.start_time_align_count = 0 + self.end_time_align_count = 0 + self.start_end_count = 0 + # 1-cer + self.one_minus_cer = 0 + self.token_count = 0 + # 1-cer language + self.one_minus_cer_lang = defaultdict(int) + self.query_count_lang = defaultdict(int) + # sentence-cut + self.miss_count = 0 + self.more_count = 0 + self.sentence_count = 0 + self.cut_rate = 0 + # detail-case + self.context = ASRContext() + # 时延 + self.send_interval = [] + self.last_recv_interval = [] + # 字含量不达标数 + self.fail_char_contains_rate_num = 0 + # 标点符号 + self.punctuation_num = 0 + self.pred_punctuation_num = 0 + + def evaluate(self, context: ASRContext): + self.query_count += 1 + self.query_count_lang[context.lang] += 1 + + voice_count = len(context.labels) + self.voice_count += voice_count + + self.punctuation_num += context.punctuation_num + self.pred_punctuation_num += context.pred_punctuation_num + + if not context.fail: + # 首字延迟 + first_delay_sum, first_delay_cnt = first_delay(context) + self.first_delay_sum += first_delay_sum + self.first_delay_cnt += first_delay_cnt + + # 修正延迟 + revision_delay_sum, revision_delay_cnt = revision_delay(context) + self.revision_delay_sum += revision_delay_sum + self.revision_delay_cnt += revision_delay_cnt + + # patch token 信息 + counter = patch_unique_token_count(context) + self.patch_unique_cnt_counter += counter + else: + self.fail_count += 1 + + self.fail_char_contains_rate_num += context.fail_char_contains_rate_num + + # text align count + start_time_align_count, end_time_align_count, start_end_count = text_align(context) + self.start_time_align_count += start_time_align_count + self.end_time_align_count += end_time_align_count + self.start_end_count += start_end_count + + # cer, wer + sentences_gt: List[str] = [item.answer for item in context.labels] + sentences_dt: List[str] = [ + item.recognition_results.text for item in context.preds if item.recognition_results.final_result + ] + if IN_TEST: + print(sentences_gt) + print(sentences_dt) + + sentences_gt: List[str] = cut_sentence(sentences_gt, TOKENIZER_MAPPING.get(context.lang)) + sentences_dt: List[str] = cut_sentence(sentences_dt, TOKENIZER_MAPPING.get(context.lang)) + if IN_TEST: + print(sentences_gt) + print(sentences_dt) + + # norm & tokenize + tokens_gt: List[List[str]] = Tokenizer.norm_and_tokenize(sentences_gt, context.lang) + tokens_dt: List[List[str]] = Tokenizer.norm_and_tokenize(sentences_dt, context.lang) + if IN_TEST: + print(tokens_gt) + print(tokens_dt) + + # cer + tokens_gt_mapping, tokens_dt_mapping = token_mapping(list(chain(*tokens_gt)), list(chain(*tokens_dt))) + one_minue_cer, token_count = cer(tokens_gt_mapping, tokens_dt_mapping) + self.one_minus_cer += one_minue_cer + self.token_count += token_count + self.one_minus_cer_lang[context.lang] += one_minue_cer + + # cut-rate + rate, sentence_cnt, miss_cnt, more_cnt = cut_rate(tokens_gt, tokens_dt, tokens_gt_mapping, tokens_dt_mapping) + self.cut_rate += rate + self.sentence_count += sentence_cnt + self.miss_count += miss_cnt + self.more_count += more_cnt + + # detail-case + self.context = context + + # 时延 + if self.context.send_time_start_end and self.context.recv_time_start_end: + send_interval = self.context.send_time_start_end[1] - self.context.send_time_start_end[0] + recv_interval = self.context.recv_time_start_end[1] - self.context.send_time_start_end[0] + self.send_interval.append(send_interval) + self.last_recv_interval.append(recv_interval) + logger.info( + f"""第一次发送时间{self.context.send_time_start_end[0]}, \ + 最后一次发送时间{self.context.send_time_start_end[-1]}, \ + 发送间隔 {send_interval}, + 最后一次接收时间{self.context.recv_time_start_end[-1]}, \ + 接收间隔 {recv_interval} + """ + ) + + def post_evaluate(self): + pass + + def gen_result(self): + result = { + "query_count": self.query_count, + "voice_count": self.voice_count, + "pred_voice_count": self.first_delay_cnt, + "first_delay_mean": self.first_delay_sum / self.first_delay_cnt if self.first_delay_cnt > 0 else 10, + "revision_delay_mean": ( + self.revision_delay_sum / self.revision_delay_cnt if self.revision_delay_cnt > 0 else 10 + ), + "patch_token_mean": mean_on_counter(self.patch_unique_cnt_counter), + "patch_token_var": var_on_counter(self.patch_unique_cnt_counter), + "start_time_align_count": self.start_time_align_count, + "end_time_align_count": self.end_time_align_count, + "start_time_align_rate": self.start_time_align_count / self.sentence_count, + "end_time_align_rate": self.end_time_align_count / self.sentence_count, + "start_end_count": self.start_end_count, + "one_minus_cer": self.one_minus_cer / self.query_count, + "token_count": self.token_count, + "miss_count": self.miss_count, + "more_count": self.more_count, + "sentence_count": self.sentence_count, + "cut_rate": self.cut_rate / self.query_count, + "fail_count": self.fail_count, + "send_interval": self.send_interval, + "last_recv_interval": self.last_recv_interval, + "fail_char_contains_rate_num": self.fail_char_contains_rate_num, + "punctuation_rate": self.pred_punctuation_num / self.punctuation_num, + } + for lang in self.one_minus_cer_lang: + result["one_minus_cer_" + lang] = \ + self.one_minus_cer_lang[lang] / self.query_count_lang[lang] + + if ( + result["first_delay_mean"] + > float(os.getenv("FIRST_DELAY_THRESHOLD", "5")) + or + self.fail_char_contains_rate_num / self.voice_count > 0.1 + # or + # result["punctuation_rate"] < 0.8 + ): + change_product_available() + return result + + def gen_detail_case(self): + return self.context diff --git a/utils/evaluator_plus.py b/utils/evaluator_plus.py new file mode 100644 index 0000000..66d7c5d --- /dev/null +++ b/utils/evaluator_plus.py @@ -0,0 +1,293 @@ +from collections import defaultdict +from copy import deepcopy +from itertools import chain +from typing import Dict, List, Tuple + +import Levenshtein + +from schemas.dataset import QueryData +from schemas.stream import StreamDataModel, StreamWordsModel +from utils.metrics import Tokenizer +from utils.metrics_plus import replace_general_punc +from utils.tokenizer import TOKENIZER_MAPPING + + +def evaluate_editops( + query_data: QueryData, recognition_results: List[StreamDataModel] +) -> Tuple[float, int, Dict[int, int], Dict[int, int], float, float]: + """返回cer 句子总数 首字对齐情况 尾字对齐情况 首字时间差值和 尾字时间差值和 + 对齐情况为 时间差值->对齐数""" + recognition_results = deepcopy(recognition_results) + lang = query_data.lang + voices = query_data.voice + sentences_pred = [ + recognition_result.text for recognition_result in recognition_results + ] + sentences_label = [item.answer for item in voices] + + tokenizer_type = TOKENIZER_MAPPING[lang] + sentences_pred = replace_general_punc(sentences_pred, tokenizer_type) + sentences_label = replace_general_punc(sentences_label, tokenizer_type) + + # norm & tokenize + tokens_pred = Tokenizer.norm_and_tokenize(sentences_pred, lang) + tokens_label = Tokenizer.norm_and_tokenize(sentences_label, lang) + + normed_words = [] + for recognition_result in recognition_results: + words = list(map(lambda x: x.text, recognition_result.words)) + normed_words.extend(words) + normed_words = replace_general_punc(normed_words, tokenizer_type) + normed_words = Tokenizer.norm(normed_words, lang) + + # 预测中的结果进行相同的norm和tokenize操作 + normed_word_index = 0 + for recognition_result in recognition_results: + next_index = normed_word_index + len(recognition_result.words) + tokens_words = Tokenizer.tokenize( + normed_words[normed_word_index:next_index], lang + ) + normed_word_index = next_index + stream_words: List[StreamWordsModel] = [] + # 将原words进行norm和tokenize操作后赋值为对应原word的时间 + for raw_stream_word, tokens_word in zip( + recognition_result.words, tokens_words + ): + for word in tokens_word: + stream_words.append( + StreamWordsModel( + text=word, + start_time=raw_stream_word.start_time, + end_time=raw_stream_word.end_time, + ) + ) + recognition_result.words = stream_words + + # 将words对应上对分词后的词,从而使得分词后的词有时间 + pred_word_time: List[StreamWordsModel] = [] + for token_pred, recognition_result in zip(tokens_pred, recognition_results): + word_index = 0 + for word in recognition_result.words: + try: + token_index = token_pred.index(word.text, word_index) + for i in range(word_index, token_index + 1): + pred_word_time.append( + StreamWordsModel( + text=token_pred[i], + start_time=word.start_time, + end_time=word.end_time, + ) + ) + word_index = token_index + 1 + except ValueError: + pass + if len(recognition_result.words) > 0: + word = recognition_result.words[-1] + start_time = word.start_time + end_time = word.end_time + else: + start_time = recognition_result.start_time + end_time = recognition_result.end_time + for i in range(word_index, len(token_pred)): + pred_word_time.append( + StreamWordsModel( + text=token_pred[i], + start_time=start_time, + end_time=end_time, + ) + ) + + # 记录label每句话的首字尾字对应分词后的位置 + index = 0 + label_firstword_index: List[int] = [] + label_lastword_index: List[int] = [] + for token_label in tokens_label: + label_firstword_index.append(index) + index += len(token_label) + label_lastword_index.append(index - 1) + + # cer + flat_tokens_pred = list(chain(*tokens_pred)) + flat_tokens_label = list(chain(*tokens_label)) + ops = Levenshtein.editops(flat_tokens_pred, flat_tokens_label) + insert = len(list(filter(lambda x: x[0] == "insert", ops))) + delete = len(list(filter(lambda x: x[0] == "delete", ops))) + replace = len(list(filter(lambda x: x[0] == "replace", ops))) + cer = (insert + delete + replace) / len(flat_tokens_label) + + # 计算每个token在编辑后的下标位置 + pred_offset = [0] * (len(flat_tokens_pred) + 1) + label_offset = [0] * (len(flat_tokens_label) + 1) + for op in ops: + if op[0] == "insert": + pred_offset[op[1]] += 1 + elif op[0] == "delete": + label_offset[op[2]] += 1 + pred_indexs = [pred_offset[0]] + for i in range(1, len(flat_tokens_pred)): + pred_indexs.append(pred_indexs[i - 1] + pred_offset[i] + 1) + label_indexs = [label_offset[0]] + for i in range(1, len(flat_tokens_label)): + label_indexs.append(label_indexs[i - 1] + label_offset[i] + 1) + + # 计算每个label中首字和尾字对应的时间 + align_start = {100: 0, 200: 0, 300: 0, 500: 0} + align_end = {100: 0, 200: 0, 300: 0, 500: 0} + first_word_distance_sum = 0.0 + last_word_distance_sum = 0.0 + for firstword_index, lastword_index, voice in zip( + label_firstword_index, label_lastword_index, voices + ): + label_index = label_indexs[firstword_index] + label_in_pred_index = upper_bound(label_index, pred_indexs) + if label_in_pred_index != -1: + distance = abs( + voice.start - pred_word_time[label_in_pred_index].start_time + ) + if label_in_pred_index > 0: + distance = min( + distance, + abs( + voice.start + - pred_word_time[label_in_pred_index - 1].start_time + ), + ) + else: + distance = abs(voice.start - pred_word_time[-1].start_time) + for limit in align_start.keys(): + if distance <= limit / 1000: + align_start[limit] += 1 + first_word_distance_sum += distance + + label_index = label_indexs[lastword_index] + label_in_pred_index = lower_bound(label_index, pred_indexs) + if label_in_pred_index != -1: + distance = abs( + voice.end - pred_word_time[label_in_pred_index].end_time + ) + if label_in_pred_index < len(pred_word_time) - 1: + distance = min( + distance, + abs( + voice.end + - pred_word_time[label_in_pred_index + 1].end_time + ), + ) + else: + distance = abs(voice.end - pred_word_time[0].end_time) + for limit in align_end.keys(): + if distance <= limit / 1000: + align_end[limit] += 1 + last_word_distance_sum += distance + return ( + cer, + len(voices), + align_start, + align_end, + first_word_distance_sum, + last_word_distance_sum, + ) + + +def evaluate_punctuation( + query_data: QueryData, recognition_results: List[StreamDataModel] +) -> Tuple[int, int, int, int]: + """评估标点符号指标 返回预测中标点数 label中标点数 预测中句子标点数 label中句子标点数""" + punctuation_mapping = defaultdict(lambda: [",", ".", "!", "?"]) + punctuation_mapping.update( + { + "zh": [",", "。", "!", "?"], + "ja": ["、", "。", "!", "?"], + "ar": ["،", ".", "!", "؟"], + "fa": ["،", ".", "!", "؟"], + "el": [",", ".", "!", ";"], + "ti": ["།"], + "th": [" ", ",", ".", "!", "?"], + } + ) + + punctuation_words: List[StreamWordsModel] = [] + for recognition_result in recognition_results: + punctuations = punctuation_mapping[query_data.lang] + for word in recognition_result.words: + for char in word.text: + if char in punctuations: + punctuation_words.append(word) + break + punctuation_start_times = list( + map(lambda x: x.start_time, punctuation_words) + ) + punctuation_start_times = sorted(punctuation_start_times) + punctuation_end_times = list(map(lambda x: x.end_time, punctuation_words)) + punctuation_end_times = sorted(punctuation_end_times) + + voices = query_data.voice + label_len = len(voices) + pred_punctuation_num = len(punctuation_words) + label_punctuation_num = 0 + for label_voice in voices: + punctuations = punctuation_mapping[query_data.lang] + for char in label_voice.answer: + if char in punctuations: + label_punctuation_num += 1 + + pred_sentence_punctuation_num = 0 + label_setence_punctuation_num = label_len + for i, label_voice in enumerate(voices): + if i < label_len - 1: + label_left = label_voice.end + label_right = voices[i + 1].start + else: + label_left = label_voice.end - 0.7 + label_right = label_voice.end + 0.7 + + left_in_pred = upper_bound(label_left, punctuation_start_times) + exist = False + if ( + left_in_pred != -1 + and punctuation_start_times[left_in_pred] <= label_right + ): + exist = True + right_in_pred = lower_bound(label_right, punctuation_end_times) + if ( + right_in_pred != -1 + and punctuation_end_times[right_in_pred] >= label_left + ): + exist = True + + if exist: + pred_sentence_punctuation_num += 1 + return ( + pred_punctuation_num, + label_punctuation_num, + pred_sentence_punctuation_num, + label_setence_punctuation_num, + ) + + +def upper_bound(x: float, lst: List[float]) -> int: + """第一个 >= x 的元素的下标 没有返回-1""" + ans = -1 + left, right = 0, len(lst) - 1 + while left <= right: + mid = (left + right) // 2 + if lst[mid] >= x: + ans = mid + right = mid - 1 + else: + left = mid + 1 + return ans + + +def lower_bound(x: float, lst: List[float]) -> int: + """最后一个 <= x 的元素的下标 没有返回-1""" + ans = -1 + left, right = 0, len(lst) - 1 + while left <= right: + mid = (left + right) // 2 + if lst[mid] <= x: + ans = mid + left = mid + 1 + else: + right = mid - 1 + return ans diff --git a/utils/file.py b/utils/file.py new file mode 100644 index 0000000..c119c5a --- /dev/null +++ b/utils/file.py @@ -0,0 +1,151 @@ +import json +import os +import shutil +import tarfile +import tempfile +import zipfile +from typing import Any + +import yaml + + +def load_json(path: str, raise_for_invalid: bool = False) -> Any: + """读取path json文件转为对象""" + with open(path, "r", encoding="utf-8") as f: + if raise_for_invalid: + + def parse_constant(s: str): + raise ValueError("非法json字符: %s" % s) + + return json.load(f, parse_constant=parse_constant) + return json.load(f) + + +def dump_json(path: str, obj: Any): + """将obj对象以json形式写入path文件""" + with open(path, "w", encoding="utf-8") as f: + json.dump(obj, f, ensure_ascii=False, indent=4) + + +def load_yaml(path: str) -> Any: + """读取path yaml文件转为对象""" + with open(path, "r", encoding="utf-8") as f: + return yaml.full_load(f) + + +def dump_yaml(path: str, obj: Any): + """将obj对象以yaml形式写入path文件""" + with open(path, "w", encoding="utf-8") as f: + yaml.dump(obj, f, indent=2, allow_unicode=True, sort_keys=False, line_break="\n") + + +def dumps_yaml(obj: Any) -> str: + """将obj对象以yaml形式导出为字符串""" + return yaml.dump(obj, indent=2, allow_unicode=True, sort_keys=False, line_break="\n") + + +def read_file(path: str) -> str: + """读取文件为str""" + with open(path, "r") as f: + return f.read() + + +def write_bfile(path: str, data: bytes): + """将bytes data写入path文件""" + with open(path, "wb") as f: + f.write(data) + + +def write_file(path: str, data: str): + """将str data写入path文件""" + with open(path, "w") as f: + f.write(data) + + +def tail_file(path: str, tail: int) -> str: + """倍增获取文件path最后tail行""" + block = 1024 + with open(path, "rb") as f: + f.seek(0, 2) + filesize = f.tell() + while True: + if filesize < block: + block = filesize + f.seek(filesize - block, 0) + lines = f.readlines() + if len(lines) > tail or filesize <= block: + return "".join(line.decode() for line in lines[-tail:]) + block *= 2 + + +def zip_dir(zip_path: str, dirname: str): + """将dirname制作为zip_path压缩包""" + with zipfile.ZipFile(zip_path, "w") as ziper: + for path, _, files in os.walk(dirname): + for file in files: + ziper.write( + os.path.join(path, file), os.path.join(path.removeprefix(dirname), file), zipfile.ZIP_DEFLATED + ) + + +def zip_files(name: str, zipfile_paths: list): + """将zipfiles_paths=list[文件名, 文件路径]制作为name压缩包""" + with zipfile.ZipFile(name, "w") as ziper: + for arcname, zipfile_path in zipfile_paths: + ziper.write(zipfile_path, arcname, zipfile.ZIP_DEFLATED) + + +def zip_strs(name: str, zipfile_strs: list): + """将zipfile_strs=list[文件名, 内容]制作为name压缩包""" + with zipfile.ZipFile(name, "w") as ziper: + for filename, content in zipfile_strs: + ziper.writestr(filename, content) + + +def zip_zipers(name: str, ziper_paths: list): + """将ziper_paths=list[压缩后名称, 压缩包/文件位置]制作为name压缩包""" + temp_dirname = tempfile.mkdtemp(prefix=name, dir=os.path.dirname(name)) + os.makedirs(temp_dirname, exist_ok=True) + for subname, ziper_path in ziper_paths: + sub_dirname = os.path.join(temp_dirname, subname) + if not os.path.exists(ziper_path): + continue + if zipfile.is_zipfile(ziper_path): + # 压缩包解压 + os.makedirs(sub_dirname, exist_ok=True) + unzip_dir(ziper_path, sub_dirname) + elif os.path.isfile(ziper_path): + # 文件 + shutil.copyfile(ziper_path, sub_dirname) + else: + # 文件夹 + shutil.copytree(ziper_path, sub_dirname) + zip_dir(name, temp_dirname) + shutil.rmtree(temp_dirname) + + +def unzip_dir(zip_path: str, dirname: str, catch_exc: bool = True): + """将zip_path解压到dirname""" + with zipfile.ZipFile(zip_path, "r") as ziper: + try: + ziper.extractall(dirname) + except Exception as e: + if catch_exc: + write_file(os.path.join(dirname, "unzip_error.log"), "%r" % e) + shutil.copyfile(zip_path, os.path.join(dirname, os.path.basename(zip_path))) + else: + raise e + + +def tar_dir(zip_path: str, dirname: str): + """将dirname压缩到zip_path""" + with tarfile.open(zip_path, "w:gz") as ziper: + for path, _, files in os.walk(dirname): + for file in files: + ziper.add(os.path.join(path, file), os.path.join(path.removeprefix(dirname), file)) + + +def untar_dir(zip_path: str, dirname: str): + """将zip_path解压到dirname""" + with tarfile.open(zip_path) as ziper: + ziper.extractall(dirname) diff --git a/utils/helm.py b/utils/helm.py new file mode 100644 index 0000000..17611fe --- /dev/null +++ b/utils/helm.py @@ -0,0 +1,331 @@ +# -*- coding: utf-8 -*- +import copy +import io +import json +import os +import re +import tarfile +import time +from collections import defaultdict +from typing import Any, Dict, Optional + +import requests +from ruamel.yaml import YAML + +from utils.logger import logger + +sut_chart_root = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "helm-chart", "sut") +headers = ( + {'Authorization': 'Bearer ' + os.getenv("LEADERBOARD_API_TOKEN")} if os.getenv("LEADERBOARD_API_TOKEN") else None +) +pull_num: defaultdict = defaultdict() +JOB_ID = int(os.getenv("JOB_ID", "-1")) +LOAD_SUT_URL = os.getenv("LOAD_SUT_URL") +GET_JOB_SUT_INFO_URL = os.getenv("GET_JOB_SUT_INFO_URL") + + +def apply_env_to_values(values, envs): + if "env" not in values: + values["env"] = [] + old_key_list = [x["name"] for x in values["env"]] + for k, v in envs.items(): + try: + idx = old_key_list.index(k) + values["env"][idx]["value"] = v + except ValueError: + values["env"].append({"name": k, "value": v}) + return values + + +def merge_values(base_value, incr_value): + if isinstance(base_value, dict) and isinstance(incr_value, dict): + for k in incr_value: + base_value[k] = merge_values(base_value[k], incr_value[k]) if k in base_value else incr_value[k] + elif isinstance(base_value, list) and isinstance(incr_value, list): + base_value.extend(incr_value) + else: + base_value = incr_value + return base_value + + +def gen_chart_tarball(docker_image): + """docker image加上digest并根据image生成helm chart包, 失败直接异常退出 + + Args: + docker_image (_type_): docker image + + Returns: + tuple[BytesIO, dict]: [helm chart包file对象, values内容] + """ + # load values template + with open(os.path.join(sut_chart_root, "values.yaml.tmpl")) as fp: + yaml = YAML(typ="rt") + values = yaml.load(fp) + # update docker_image + get_image_hash_url = os.getenv("GET_IMAGE_HASH_URL", None) + logger.info(f"get_image_hash_url: {get_image_hash_url}") + if get_image_hash_url is not None: + # convert tag to hash for docker_image + #docker_image = "harbor-contest.4pd.io/zhoushasha/speaker_identification:wo_model_v0" + docker_image = "harbor-contest.4pd.io/zhoushasha/image_classification:wo_model_v3" + resp = requests.get(get_image_hash_url, headers=headers, params={"image": docker_image}, timeout=600) + + logger.info(f"resp.text: {resp.text}") + assert resp.status_code == 200, "Convert tag to hash for docker image failed, API retcode %d" % resp.status_code + resp = resp.json() + assert resp["success"], "Convert tag to hash for docker image failed, response: %s" % str(resp) + token = resp["data"]["image"].rsplit(":", 2) + assert len(token) == 3, "Invalid docker image %s" % resp["data"]["image"] + values["image"]["repository"] = token[0] + values["image"]["tag"] = ":".join(token[1:]) + else: + token = docker_image.rsplit(":", 1) + if len(token) != 2: + raise RuntimeError("Invalid docker image %s" % docker_image) + values["image"]["repository"] = token[0] + values["image"]["tag"] = token[1] + # output values.yaml + with open(os.path.join(sut_chart_root, "values.yaml"), "w") as fp: + yaml = YAML(typ="rt") + yaml.dump(values, fp) + # tarball + tarfp = io.BytesIO() + with tarfile.open(fileobj=tarfp, mode="w:gz") as tar: + tar.add(sut_chart_root, arcname=os.path.basename(sut_chart_root), recursive=True) + tarfp.seek(0) + logger.debug(f"Generated chart using values: {values}") + return tarfp, values + + +def deploy_chart( + name_suffix, + readiness_timeout, + chart_str=None, + chart_fileobj=None, + extra_values=None, + restart_count_limit=3, + pullimage_count_limit=3, +): + """部署sut, 失败直接异常退出 + + Args: + name_suffix (str): 同一个job有多个sut时, 区分不同sut的名称 + readiness_timeout (int): readiness超时时间, 单位s + chart_str (int, optional): chart url, 不为None则忽略chart_fileobj. Defaults to None. + chart_fileobj (BytesIO, optional): helm chart包file对象, chart_str不为None使用. Defaults to None. + extra_values (dict, optional): helm values的补充内容. Defaults to None. + restart_count_limit (int, optional): sut重启次数限制, 超出则异常退出. Defaults to 3. + pullimage_count_limit (int, optional): image拉取次数限制, 超出则异常退出. Defaults to 3. + + Returns: + tuple[str, str]: [用于访问服务的k8s域名, 用于unload_sut的名称] + """ + logger.info(f"Deploying SUT application for JOB {JOB_ID}, name_suffix {name_suffix}, extra_values {extra_values}") + # deploy + payload = { + "job_id": JOB_ID, + "resource_name": name_suffix, + "priorityclassname": os.environ.get("priorityclassname"), + } + extra_values = {} if not extra_values else extra_values + payload["values"] = json.dumps(extra_values, ensure_ascii=False) + if chart_str is not None: + payload["helm_chart"] = chart_str + resp = requests.post(LOAD_SUT_URL, data=payload, headers=headers, timeout=600) + else: + assert chart_fileobj is not None, "Either chart_str or chart_fileobj should be set" + + logger.info(f"LOAD_SUT_URL: {LOAD_SUT_URL}") + logger.info(f"payload: {payload}") + logger.info(f"headers: {headers}") + + resp = requests.post( + LOAD_SUT_URL, + data=payload, + headers=headers, + files=[("helm_chart_file", (name_suffix + ".tgz", chart_fileobj))], + timeout=600, + ) + + + if resp.status_code != 200: + raise RuntimeError("Failed to deploy application status_code %d %s" % (resp.status_code, resp.text)) + resp = resp.json() + if not resp["success"]: + logger.error("Failed to deploy application response %r", resp) + service_name = resp["data"]["service_name"] + sut_name = resp["data"]["sut_name"] + logger.info(f"SUT application deployed with service_name {service_name}") + # waiting for appliation ready + running_at = None + retry_count = 0 + while True: + retry_interval = 10 + if retry_count % 20 == 19: + retry_count += 1 + logger.info(f"Waiting {retry_interval} seconds to check whether SUT application {service_name} is ready...") + logger.info("20 retrys log this message again.") + time.sleep(retry_interval) + check_result, running_at = check_sut_ready_from_resp( + service_name, + running_at, + readiness_timeout, + restart_count_limit, + pullimage_count_limit, + ) + if check_result: + break + + logger.info(f"SUT application for JOB {JOB_ID} name_suffix {name_suffix} is ready, service_name {service_name}") + return service_name, sut_name + + +def check_sut_ready_from_resp( + service_name, + running_at, + readiness_timeout, + restart_count_limit, + pullimage_count_limit, +): + try: + resp = requests.get( + f"{GET_JOB_SUT_INFO_URL}/{JOB_ID}", + headers=headers, + params={"with_detail": True}, + timeout=600, + ) + except Exception as e: + logger.warning(f"Exception occured while getting SUT application {service_name} status", e) + return False, running_at + if resp.status_code != 200: + logger.warning(f"Get SUT application {service_name} status failed with status_code {resp.status_code}") + return False, running_at + resp = resp.json() + if not resp["success"]: + logger.warning(f"Get SUT application {service_name} status failed with response {resp}") + return False, running_at + if len(resp["data"]["sut"]) == 0: + logger.warning("Empty SUT application status") + return False, running_at + resp_data_sut = copy.deepcopy(resp["data"]["sut"]) + for status in resp_data_sut: + del status["detail"] + logger.info(f"Got SUT application status: {resp_data_sut}") + for status in resp["data"]["sut"]: + if status["phase"] in ["Succeeded", "Failed"]: + raise RuntimeError(f"Some pods of SUT application {service_name} terminated with status {status}") + elif status["phase"] in ["Pending", "Unknown"]: + return False, running_at + elif status["phase"] != "Running": + raise RuntimeError(f"Unexcepted pod status {status} of SUT application {service_name}") + if running_at is None: + running_at = time.time() + for ct in status["detail"]["status"]["container_statuses"]: + if ct["restart_count"] > 0: + logger.info(f"pod {status['pod_name']} restart count = {ct['restart_count']}") + if ct["restart_count"] > restart_count_limit: + raise RuntimeError(f"pod {status['pod_name']} restart too many times(over {restart_count_limit})") + if ( + ct["state"]["waiting"] is not None + and "reason" in ct["state"]["waiting"] + and ct["state"]["waiting"]["reason"] in ["ImagePullBackOff", "ErrImagePull"] + ): + pull_num[status["pod_name"]] += 1 + logger.info( + "pod %s has {pull_num[status['pod_name']]} times inspect pulling image info: %s" + % (status["pod_name"], ct["state"]["waiting"]) + ) + if pull_num[status["pod_name"]] > pullimage_count_limit: + raise RuntimeError(f"pod {status['pod_name']} cannot pull image") + if not status["conditions"]["Ready"]: + if running_at is not None and time.time() - running_at > readiness_timeout: + raise RuntimeError(f"SUT Application readiness has exceeded readiness_timeout:{readiness_timeout}s") + return False, running_at + return True, running_at + + +def parse_resource(resource): + if resource == -1: + return -1 + match = re.match(r"([\d\.]+)([mKMGTPENi]*)", resource) + value, unit = match.groups() + value = float(value) + unit_mapping = { + "": 1, + "m": 1e-3, + "K": 1e3, + "M": 1e6, + "G": 1e9, + "T": 1e12, + "P": 1e15, + "E": 1e18, + "Ki": 2**10, + "Mi": 2**20, + "Gi": 2**30, + "Ti": 2**40, + "Pi": 2**50, + "Ei": 2**60, + } + if unit not in unit_mapping: + raise ValueError(f"Unknown resources unit: {unit}") + return value * unit_mapping[unit] + + +def limit_resources(resource): + if "limits" not in resource: + return resource + if "cpu" in resource["limits"]: + cpu_limit = parse_resource(resource["limits"]["cpu"]) + if cpu_limit > 30: + logger.error("CPU limit exceeded. Adjusting to 30 cores.") + resource["limits"]["cpu"] = "30" + if "memory" in resource["limits"]: + memory_limit = parse_resource(resource["limits"]["memory"]) + if memory_limit > 100 * 2**30: + logger.error("Memory limit exceeded, adjusting to 100Gi") + resource["limits"]["memory"] = "100Gi" + + +def consistent_resources(resource): + if "limits" not in resource and "requests" not in resource: + return resource + elif "limits" in resource: + resource["requests"] = resource["limits"] + else: + resource["limits"] = resource["requests"] + return resource + + +def resource_check(values: Dict[str, Any]): + resources = values.get("resources", {}).get("limits", {}) + if "nvidia.com/gpu" in resources and int(resources["nvidia.com/gpu"]) > 0: + values["resources"]["limits"]["nvidia.com/gpumem"] = 8192 + values["resources"]["limits"]["nvidia.com/gpucores"] = 10 + values["resources"]["requests"] = values["resources"].get("requests", {}) + if "cpu" not in values["resources"]["requests"] and "cpu" in values["resources"]["limits"]: + values["resources"]["requests"]["cpu"] = values["resources"]["limits"]["cpu"] + if "memory" not in values["resources"]["requests"] and "memory" in values["resources"]["limits"]: + values["resources"]["requests"]["memory"] = values["resources"]["limits"]["memory"] + values["resources"]["requests"]["nvidia.com/gpu"] = values["resources"]["limits"]["nvidia.com/gpu"] + values["resources"]["requests"]["nvidia.com/gpumem"] = 8192 + values["resources"]["requests"]["nvidia.com/gpucores"] = 10 + + values["nodeSelector"] = values.get("nodeSelector", {}) + if "contest.4pd.io/accelerator" not in values["nodeSelector"]: + values["nodeSelector"]["contest.4pd.io/accelerator"] = "A100-SXM4-80GBvgpu" + gpu_type = values["nodeSelector"]["contest.4pd.io/accelerator"] + gpu_num = resources["nvidia.com/gpu"] + if gpu_type != "A100-SXM4-80GBvgpu": + raise RuntimeError("GPU类型只能为A100-SXM4-80GBvgpu") + if gpu_num != 1: + raise RuntimeError("GPU个数只能为1") + values["tolerations"] = values.get("tolerations", []) + values["tolerations"].append( + { + "key": "hosttype", + "operator": "Equal", + "value": "vgpu", + "effect": "NoSchedule", + } + ) + return values diff --git a/utils/leaderboard.py b/utils/leaderboard.py new file mode 100644 index 0000000..b4e1223 --- /dev/null +++ b/utils/leaderboard.py @@ -0,0 +1,38 @@ +from utils.request import requests_retry_session +import os +import json +import traceback +from utils.logger import logger + +lb_headers = {"Content-Type":"application/json"} +if os.getenv("LEADERBOARD_API_TOKEN"): + lb_headers['Authorization'] = 'Bearer ' + os.getenv("LEADERBOARD_API_TOKEN") + + +def change_product_unavailable() -> None: + logger.info("更改为产品不可用...") + submit_id = str(os.getenv("SUBMIT_ID", -1)) + try: + requests_retry_session().post( + os.getenv("UPDATE_SUBMIT_URL", "http://contest.4pd.io:8080/submit/update"), + data=json.dumps({submit_id: {"product_avaliable": 0}}), + headers=lb_headers, + ) + except Exception as e: + logger.error(traceback.format_exc()) + logger.error(f"change product avaliable error, {e}") + + +def mark_evaluating(task_id) -> None: + logger.info("上报EVALUATING状态...") + job_id = os.getenv('JOB_ID') or "-1" + url = os.getenv("REGISTER_MARK_TASK_URL", "http://contest.4pd.io:8080/job/register_mark_task") + "/" + job_id + try: + requests_retry_session().post( + url, + data=json.dumps({"task_id": task_id}), + headers=lb_headers, + ) + except Exception as e: + logger.error(traceback.format_exc()) + logger.error(f"mark evaluating error, {e}") \ No newline at end of file diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000..4f08242 --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +import logging +import os + +logging.basicConfig( + format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO"), +) +logger = logging.getLogger(__file__) + +# another logger + +log = logging.getLogger("detailed_logger") + +log.propagate = False + +level = logging.INFO + +log.setLevel(level) + +formatter = logging.Formatter( + "[%(asctime)s] %(levelname)s : %(pathname)s:%(lineno)d - %(message)s", + "%Y-%m-%d %H:%M:%S", +) + +streamHandler = logging.StreamHandler() +streamHandler.setLevel(level) +streamHandler.setFormatter(formatter) +log.addHandler(streamHandler) diff --git a/utils/metrics.py b/utils/metrics.py new file mode 100644 index 0000000..8be1f55 --- /dev/null +++ b/utils/metrics.py @@ -0,0 +1,320 @@ +# coding: utf-8 + +import os +from collections import Counter +from copy import deepcopy +from typing import List, Tuple + +import Levenshtein +import numpy as np +from schemas.context import ASRContext +from utils.logger import logger +from utils.tokenizer import Tokenizer, TokenizerType +from utils.update_submit import change_product_available + +IN_TEST = os.getenv("SUBMIT_CONFIG_FILEPATH", None) is None + + +def text_align(context: ASRContext) -> Tuple: + start_end_count = 0 + + label_start_time_list = [] + label_end_time_list = [] + for label_item in context.labels: + label_start_time_list.append(label_item.start) + label_end_time_list.append(label_item.end) + pred_start_time_list = [] + pred_end_time_list = [] + sentence_start = True + for pred_item in context.preds: + if sentence_start: + pred_start_time_list.append(pred_item.recognition_results.start_time) + if pred_item.recognition_results.final_result: + pred_end_time_list.append(pred_item.recognition_results.end_time) + sentence_start = pred_item.recognition_results.final_result + # check start0 < end0 < start1 < end1 < start2 < end2 - ... + if IN_TEST: + print(pred_start_time_list) + print(pred_end_time_list) + pred_time_list = [] + i, j = 0, 0 + while i < len(pred_start_time_list) and j < len(pred_end_time_list): + pred_time_list.append(pred_start_time_list[i]) + pred_time_list.append(pred_end_time_list[j]) + i += 1 + j += 1 + if i < len(pred_start_time_list): + pred_time_list.append(pred_start_time_list[-1]) + for i in range(1, len(pred_time_list)): + # 这里给个 600ms 的宽限 + if pred_time_list[i] < pred_time_list[i - 1] - 0.6: + logger.error("识别的 start、end 不符合 start0 < end0 < start1 < end1 < start2 < end2 ...") + logger.error( + f"当前识别的每个句子开始和结束时间分别为: \ + 开始时间:{pred_start_time_list}, \ + 结束时间:{pred_end_time_list}" + ) + start_end_count += 1 + # change_product_available() + # 时间前后差值 300ms 范围内 + start_time_align_count = 0 + end_time_align_count = 0 + for label_start_time in label_start_time_list: + for pred_start_time in pred_start_time_list: + if pred_start_time <= label_start_time + 0.3 and pred_start_time >= label_start_time - 0.3: + start_time_align_count += 1 + break + for label_end_time in label_end_time_list: + for pred_end_time in pred_end_time_list: + if pred_end_time <= label_end_time + 0.3 and pred_end_time >= label_end_time - 0.3: + end_time_align_count += 1 + break + logger.info( + f"start-time 对齐个数 {start_time_align_count}, \ + end-time 对齐个数 {end_time_align_count}\ + 数据集中句子总数 {len(label_start_time_list)}" + ) + return start_time_align_count, end_time_align_count, start_end_count + + +def first_delay(context: ASRContext) -> Tuple: + first_send_time = context.preds[0].send_time + first_delay_list = [] + sentence_start = True + for pred_context in context.preds: + if sentence_start: + sentence_begin_time = pred_context.recognition_results.start_time + first_delay_time = pred_context.recv_time - first_send_time - sentence_begin_time + first_delay_list.append(first_delay_time) + sentence_start = pred_context.recognition_results.final_result + if IN_TEST: + print(f"当前音频的首字延迟为{first_delay_list}") + logger.info(f"当前音频的首字延迟均值为 {np.mean(first_delay_list)}s") + return np.sum(first_delay_list), len(first_delay_list) + + +def revision_delay(context: ASRContext): + first_send_time = context.preds[0].send_time + revision_delay_list = [] + for pred_context in context.preds: + if pred_context.recognition_results.final_result: + sentence_end_time = pred_context.recognition_results.end_time + revision_delay_time = pred_context.recv_time - first_send_time - sentence_end_time + revision_delay_list.append(revision_delay_time) + + if IN_TEST: + print(revision_delay_list) + logger.info(f"当前音频的修正延迟均值为 {np.mean(revision_delay_list)}s") + return np.sum(revision_delay_list), len(revision_delay_list) + + +def patch_unique_token_count(context: ASRContext): + # print(context.__dict__) + # 对于每一个返回的结果都进行 tokenize + pred_text_list = [pred_context.recognition_results.text for pred_context in context.preds] + pred_text_tokenized_list = Tokenizer.norm_and_tokenize(pred_text_list, lang=context.lang) + # print(pred_text_list) + # print(pred_text_tokenized_list) + + # 判断当前是否修改了超过 3s 内的 token 数目 + ## 当前句子的最开始接受时间 + first_recv_time = None + ## 不可修改的 token 个数 + unmodified_token_cnt = 0 + ## 3s 的 index 位置 + time_token_idx = 0 + ## 当前是句子的开始 + final_sentence = True + + ## 修改了不可修改的范围 + is_unmodified_token = False + + for idx, (now_tokens, pred_context) in enumerate(zip(pred_text_tokenized_list, context.preds)): + ## 当前是句子的第一次返回 + if final_sentence: + first_recv_time = pred_context.recv_time + unmodified_token_cnt = 0 + time_token_idx = idx + final_sentence = pred_context.recognition_results.final_result + continue + final_sentence = pred_context.recognition_results.final_result + ## 当前 pred 的 recv-time + pred_recv_time = pred_context.recv_time + ## 最开始 3s 直接忽略 + if pred_recv_time - first_recv_time < 3: + continue + ## 根据历史返回信息,获得最长不可修改长度 + while time_token_idx < idx: + context_pred_tmp = context.preds[time_token_idx] + context_pred_tmp_recv_time = context_pred_tmp.recv_time + tmp_tokens = pred_text_tokenized_list[time_token_idx] + if pred_recv_time - context_pred_tmp_recv_time >= 3: + unmodified_token_cnt = max(unmodified_token_cnt, len(tmp_tokens)) + time_token_idx += 1 + else: + break + ## 和自己的上一条音频比,只能修改 unmodified_token_cnt 个 token + last_tokens = pred_text_tokenized_list[idx - 1] + if context.lang in ['ar', 'he']: + tokens_check_pre, tokens_check_now = last_tokens[::-1], now_tokens[::-1] + continue + else: + tokens_check_pre, tokens_check_now = last_tokens, now_tokens + for token_a, token_b in zip(tokens_check_pre[:unmodified_token_cnt], tokens_check_now[:unmodified_token_cnt]): + if token_a != token_b: + is_unmodified_token = True + break + + if is_unmodified_token and int(os.getenv('test', 0)): + logger.error( + f"{idx}-{unmodified_token_cnt}-{last_tokens[:unmodified_token_cnt]}-{now_tokens[:unmodified_token_cnt]}" + ) + if is_unmodified_token: + break + + if is_unmodified_token: + logger.error("修改了不可修改的文字范围") + # change_product_available() + if int(os.getenv('test', 0)): + final_result = True + result_list = [] + for tokens, pred in zip(pred_text_tokenized_list, context.preds): + if final_result: + result_list.append([]) + result_list[-1].append((tokens, pred.recv_time - context.preds[0].recv_time)) + final_result = pred.recognition_results.final_result + for item in result_list: + logger.info(str(item)) + + # 记录每个 patch 的 token 个数 + patch_unique_cnt_counter = Counter() + patch_unique_cnt_in_one_sentence = set() + for pred_text_tokenized, pred_context in zip(pred_text_tokenized_list, context.preds): + token_cnt = len(pred_text_tokenized) + patch_unique_cnt_in_one_sentence.add(token_cnt) + if pred_context.recognition_results.final_result: + for unique_cnt in patch_unique_cnt_in_one_sentence: + patch_unique_cnt_counter[unique_cnt] += 1 + patch_unique_cnt_in_one_sentence.clear() + if context.preds and not context.preds[-1].recognition_results.final_result: + for unique_cnt in patch_unique_cnt_in_one_sentence: + patch_unique_cnt_counter[unique_cnt] += 1 + # print(patch_unique_cnt_counter) + logger.info( + f"当前音频的 patch token 均值为 {mean_on_counter(patch_unique_cnt_counter)}, \ + 当前音频的 patch token 方差为 {var_on_counter(patch_unique_cnt_counter)}" + ) + return patch_unique_cnt_counter + + +def mean_on_counter(counter: Counter): + total_sum = sum(key * count for key, count in counter.items()) + total_count = sum(counter.values()) + return total_sum * 1.0 / total_count + + +def var_on_counter(counter: Counter): + total_sum = sum(key * count for key, count in counter.items()) + total_count = sum(counter.values()) + mean = total_sum * 1.0 / total_count + return sum((key - mean) ** 2 * count for key, count in counter.items()) / total_count + + +def edit_distance(arr1: List, arr2: List): + operations = Levenshtein.editops(arr1, arr2) + i = sum([1 for operation in operations if operation[0] == "insert"]) + s = sum([1 for operation in operations if operation[0] == "replace"]) + d = sum([1 for operation in operations if operation[0] == "delete"]) + c = len(arr1) - s - d + return s, d, i, c + + +def cer(tokens_gt_mapping: List[str], tokens_dt_mapping: List[str]): + """输入的是经过编辑距离映射后的两个 token 序列,返回 1-cer, token-cnt""" + insert = sum(1 for item in tokens_gt_mapping if item is None) + delete = sum(1 for item in tokens_dt_mapping if item is None) + equal = sum(1 for token_gt, token_dt in zip(tokens_gt_mapping, tokens_dt_mapping) if token_gt == token_dt) + replace = len(tokens_gt_mapping) - insert - equal + + token_count = replace + equal + delete + cer_value = (replace + delete + insert) * 1.0 / token_count + logger.info(f"当前音频的 cer/wer 值为 {cer_value}, token 个数为 {token_count}") + return 1 - cer_value, token_count + + +def cut_rate( + tokens_gt: List[List[str]], + tokens_dt: List[List[str]], + tokens_gt_mapping: List[str], + tokens_dt_mapping: List[str], +): + sentence_final_token_index_gt = sentence_final_token_index(tokens_gt, tokens_gt_mapping) + sentence_final_token_index_dt = sentence_final_token_index(tokens_dt, tokens_dt_mapping) + sentence_final_token_index_gt = set(sentence_final_token_index_gt) + sentence_final_token_index_dt = set(sentence_final_token_index_dt) + sentence_count_gt = len(sentence_final_token_index_gt) + miss_count = len(sentence_final_token_index_gt - sentence_final_token_index_dt) + more_count = len(sentence_final_token_index_dt - sentence_final_token_index_gt) + rate = max(1 - (miss_count + more_count * 2) / sentence_count_gt, 0) + return rate, sentence_count_gt, miss_count, more_count + + +def token_mapping(tokens_gt: List[str], tokens_dt: List[str]) -> Tuple[List[str], List[str]]: + arr1 = deepcopy(tokens_gt) + arr2 = deepcopy(tokens_dt) + operations = Levenshtein.editops(arr1, arr2) + for op in operations[::-1]: + if op[0] == "insert": + arr1.insert(op[1], None) + elif op[0] == "delete": + arr2.insert(op[2], None) + return arr1, arr2 + + +def sentence_final_token_index(tokens: List[List[str]], tokens_mapping: List[str]) -> List[int]: + """获得原句子中每个句子尾部 token 的 index""" + token_index_list = [] + token_index = 0 + for token_in_one_sentence in tokens: + for _ in range(len(token_in_one_sentence)): + while token_index < len(tokens_mapping) and tokens_mapping[token_index] is None: + token_index += 1 + token_index += 1 + token_index_list.append(token_index - 1) + return token_index_list + + +def cut_sentence(sentences: List[str], tokenizerType: TokenizerType) -> List[str]: + """use self.cut_punc to cut all sentences, merge them and put them into list""" + sentence_cut_list = [] + for sentence in sentences: + sentence_list = [sentence] + sentence_tmp_list = [] + for punc in [ + "······", + "......", + "。", + ",", + "?", + "!", + ";", + ":", + "...", + ".", + ",", + "?", + "!", + ";", + ":", + ]: + for sentence in sentence_list: + sentence_tmp_list.extend(sentence.split(punc)) + sentence_list, sentence_tmp_list = sentence_tmp_list, [] + sentence_list = [item for item in sentence_list if item] + + if tokenizerType == TokenizerType.whitespace: + sentence_cut_list.append(" ".join(sentence_list)) + else: + sentence_cut_list.append("".join(sentence_list)) + + return sentence_cut_list diff --git a/utils/metrics_plus.py b/utils/metrics_plus.py new file mode 100644 index 0000000..44701ed --- /dev/null +++ b/utils/metrics_plus.py @@ -0,0 +1,50 @@ +from typing import List + +from utils.tokenizer import TokenizerType + + +def replace_general_punc( + sentences: List[str], tokenizer: TokenizerType +) -> List[str]: + """代替原来的函数 utils.metrics.cut_sentence""" + general_puncs = [ + "······", + "......", + "。", + ",", + "?", + "!", + ";", + ":", + "...", + ".", + ",", + "?", + "!", + ";", + ":", + ] + if tokenizer == TokenizerType.whitespace: + replacer = " " + else: + replacer = "" + trans = str.maketrans(dict.fromkeys("".join(general_puncs), replacer)) + ret_sentences = [""] * len(sentences) + for i, sentence in enumerate(sentences): + sentence = sentence.translate(trans) + sentence = sentence.strip() + sentence = sentence.lower() + ret_sentences[i] = sentence + return ret_sentences + + +def distance_point_line( + point: float, line_start: float, line_end: float +) -> float: + """计算点到直线的距离""" + if line_start <= point <= line_end: + return 0 + if point < line_start: + return abs(point - line_start) + else: + return abs(point - line_end) diff --git a/utils/pynini/Dockerfile b/utils/pynini/Dockerfile new file mode 100644 index 0000000..12618c5 --- /dev/null +++ b/utils/pynini/Dockerfile @@ -0,0 +1,93 @@ +# Dockerfile +# Pierre-André Noël, May 12th 2020 +# Copyright © Element AI Inc. All rights reserved. +# Apache License, Version 2.0 +# +# This builds `manylinux_2_28_x86_64` Python wheels for `pynini`, wrapping +# all its dependencies. +# +# This Dockerfile uses multi-stage builds; for more information, see: +# https://docs.docker.com/develop/develop-images/multistage-build/ +# +# The recommended installation method for Pynini is through Conda-Forge. This gives Linux +# x86-64 users another option: installing a precompiled module from PyPI. +# +# +# To build wheels and run Pynini's tests, run: +# +# docker build --target=run-tests -t build-pynini-wheels . +# +# To extract the resulting wheels from the Docker image, run: +# +# docker run --rm -v `pwd`:/io build-pynini-wheels cp -r /wheelhouse /io +# +# Notice that this also generates Cython wheels. +# +# Then, `twine` (https://twine.readthedocs.io/en/latest/) can be used to +# publish the resulting Pynini wheels. + +# ****************************************************** +# *** All the following images are based on this one *** +# ****************************************************** +#from quay.io/pypa/manylinux_2_28_x86_64 AS common + +# *********************************************************************** +# *** Image providing all the requirements for building Pynini wheels *** +# *********************************************************************** +FROM harbor.4pd.io/inf/base-python3.8-ubuntu:1.1.0 + +# The versions we want in the wheels. +ENV FST_VERSION "1.8.3" +ENV PYNINI_VERSION "2.1.6" + +# Location of OpenFst and Pynini. +ENV FST_DOWNLOAD_PREFIX "https://www.openfst.org/twiki/pub/FST/FstDownload" +ENV PYNINI_DOWNLOAD_PREFIX "https://www.opengrm.org/twiki/pub/GRM/PyniniDownload" + +# Note that our certificates are not known to the version of wget available in this image. + +# Gets and unpack OpenFst source. +RUN apt update && apt-get install -y wget gcc-9 g++-9 make && ln -s $(which gcc-9) /usr/bin/gcc && ln -s $(which g++-9) /usr/bin/g++ +RUN cd /tmp \ + && wget -q --no-check-certificate "${FST_DOWNLOAD_PREFIX}/openfst-${FST_VERSION}.tar.gz" \ + && tar -xzf "openfst-${FST_VERSION}.tar.gz" \ + && rm "openfst-${FST_VERSION}.tar.gz" + +# Compiles OpenFst. +RUN cd "/tmp/openfst-${FST_VERSION}" \ + && ./configure --enable-grm \ + && make --jobs 4 install \ + && rm -rd "/tmp/openfst-${FST_VERSION}" + +# Gets and unpacks Pynini source. +RUN mkdir -p /src && cd /src \ + && wget -q --no-check-certificate "${PYNINI_DOWNLOAD_PREFIX}/pynini-${PYNINI_VERSION}.tar.gz" \ + && tar -xzf "pynini-${PYNINI_VERSION}.tar.gz" \ + && rm "pynini-${PYNINI_VERSION}.tar.gz" + +# Installs requirements in all our Pythons. +RUN pip install -i https://nexus.4pd.io/repository/pypi-all/simple -r "/src/pynini-${PYNINI_VERSION}/requirements.txt" || exit; + + +# ********************************************************** +# *** Image making pynini wheels (placed in /wheelhouse) *** +# ********************************************************** +#FROM wheel-building-env AS build-wheels + +# Compiles the wheels to a temporary directory. +RUN pip wheel -i https://nexus.4pd.io/repository/pypi-all/simple -v "/src/pynini-${PYNINI_VERSION}" -w /tmp/wheelhouse/ || exit; + +RUN wget ftp://ftp.4pd.io/pub/pico/temp/patchelf-0.18.0-x86_64.tar.gz && tar xzf patchelf-0.18.0-x86_64.tar.gz && rm -f patchelf-0.18.0-x86_64.tar.gz +RUN pip install -i https://nexus.4pd.io/repository/pypi-all/simple auditwheel +# Bundles external shared libraries into the wheels. +# See https://github.com/pypa/manylinux/tree/manylinux2014 +RUN for WHL in /tmp/wheelhouse/pynini*.whl; do \ + PATH=$(pwd)/bin:$PATH auditwheel repair --plat manylinux_2_31_x86_64 "${WHL}" -w /wheelhouse/ || exit; \ +done +#RUN mkdir -p /wheelhouse && for WHL in /tmp/wheelhouse/pynini*.whl; do \ +# cp "${WHL}" /wheelhouse/; \ +#done + +# Removes the non-repaired wheels. +RUN rm -rd /tmp/wheelhouse + diff --git a/utils/pynini/README.md b/utils/pynini/README.md new file mode 100644 index 0000000..423602e --- /dev/null +++ b/utils/pynini/README.md @@ -0,0 +1,17 @@ +# pynini + +## 背景 + +SpeechIO对英文ASR的评估工具依赖第三方库pynini(https://github.com/kylebgorman/pynini),该库强绑定OS和gcc版本,需要在运行环境中编译生成wheel包,本文说明编译pynini生成wheel包的方法 + +## 编译 + +```shell +docker build -t build-pynini-wheels . +``` + +## 获取wheel包 + +```shell +docker run --rm -v `pwd`:/io build-pynini-wheels cp -r /wheelhouse /io +``` diff --git a/utils/request.py b/utils/request.py new file mode 100644 index 0000000..c99fe75 --- /dev/null +++ b/utils/request.py @@ -0,0 +1,40 @@ +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +DEFAULT_TIMEOUT = 2 * 60 # seconds + + +class TimeoutHTTPAdapter(HTTPAdapter): + def __init__(self, *args, **kwargs): + self.timeout = DEFAULT_TIMEOUT + if "timeout" in kwargs: + self.timeout = kwargs["timeout"] + del kwargs["timeout"] + super().__init__(*args, **kwargs) + + def send(self, request, **kwargs): + timeout = kwargs.get("timeout") + if timeout is None: + kwargs["timeout"] = self.timeout + return super().send(request, **kwargs) + + +def requests_retry_session( + retries=3, + backoff_factor=1, + status_forcelist=[500, 502, 504, 404, 403], + session=None, +): + session = session or requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = TimeoutHTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session diff --git a/utils/service.py b/utils/service.py new file mode 100644 index 0000000..bcd8557 --- /dev/null +++ b/utils/service.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +import os +import sys + +from utils.helm import deploy_chart, gen_chart_tarball +from utils.logger import logger + + +def register_sut(st_config, resource_name, **kwargs): + + job_id = "".join([c for c in str(os.getenv("JOB_ID", -1)) if c.isnumeric()]) + + docker_image = "10.255.143.18:5000/speaker_identification:wo_model_v0" + #if "docker_image" in st_config and st_config["docker_image"]: + st_config_values = st_config.get("values", {}) + #docker_image = st_config["docker_image"] + docker_image = "10.255.143.18:5000/speaker_identification:wo_model_v0" + chart_tar_fp, chart_values = gen_chart_tarball(docker_image) + sut_service_name, _ = deploy_chart( + resource_name, + int(os.getenv("readiness_timeout", 60 * 3)), + chart_fileobj=chart_tar_fp, + extra_values=st_config_values, + restart_count_limit=int(os.getenv('restart_count', 3)), + ) + chart_tar_fp.close() + if st_config_values is not None and "service" in st_config_values and "port" in st_config_values["service"]: + sut_service_port = str(st_config_values["service"]["port"]) + else: + sut_service_port = str(chart_values["service"]["port"]) + return "ws://{}:{}".format(sut_service_name, sut_service_port) + + + """ + elif "chart_repo" in st_config: + logger.info(f"正在使用 helm-chart 配置,内容为 {st_config}") + chart_repo = st_config.get("chart_repo", None) + chart_name = st_config.get("chart_name", None) + chart_version = st_config.get("chart_version", None) + if chart_repo is None or chart_name is None or chart_version is None: + logger.error("chart_repo, chart_name, chart_version cant be none") + logger.info(f"{chart_repo} {chart_name} {chart_version}") + chart_str = os.path.join(chart_repo, chart_name) + ':' + chart_version + + st_cfg_values = st_config.get('values', {}) + st_config["values"] = st_cfg_values + + sut_service_name, _ = deploy_chart( + resource_name, + 600, + chart_str=chart_str, + extra_values=st_cfg_values, + ) + sut_service_name = f"asr-{job_id}" + if st_cfg_values is not None and 'service' in st_cfg_values and 'port' in st_cfg_values['service']: + sut_service_port = str(st_cfg_values['service']['port']) + else: + sut_service_port = '80' + return 'ws://%s:%s' % (sut_service_name, sut_service_port) + else: + logger.error("配置信息错误,缺少 docker_image 属性") + #sys.exit(-1) + + + """ diff --git a/utils/speechio/__init__.py b/utils/speechio/__init__.py new file mode 100644 index 0000000..dfb48db --- /dev/null +++ b/utils/speechio/__init__.py @@ -0,0 +1,3 @@ +''' +reference: https://github.com/SpeechColab/Leaderboard/tree/f287a992dc359d1c021bfc6ce810e5e36608e057/utils +''' diff --git a/utils/speechio/error_rate_en.py b/utils/speechio/error_rate_en.py new file mode 100644 index 0000000..352939f --- /dev/null +++ b/utils/speechio/error_rate_en.py @@ -0,0 +1,551 @@ +#!/usr/bin/env python3 +# coding=utf8 +# Copyright 2022 Zhenxiang MA, Jiayu DU (SpeechColab) + +import argparse +import csv +import json +import logging +import os +import sys +from typing import Iterable + +logging.basicConfig(stream=sys.stderr, level=logging.ERROR, format='[%(levelname)s] %(message)s') + +import pynini +from pynini.lib import pynutil + + +# reference: https://github.com/kylebgorman/pynini/blob/master/pynini/lib/edit_transducer.py +# to import original lib: +# from pynini.lib.edit_transducer import EditTransducer +class EditTransducer: + DELETE = "" + INSERT = "" + SUBSTITUTE = "" + + def __init__( + self, + symbol_table, + vocab: Iterable[str], + insert_cost: float = 1.0, + delete_cost: float = 1.0, + substitute_cost: float = 1.0, + bound: int = 0, + ): + # Left factor; note that we divide the edit costs by two because they also + # will be incurred when traversing the right factor. + sigma = pynini.union( + *[pynini.accep(token, token_type=symbol_table) for token in vocab], + ).optimize() + + insert = pynutil.insert(f"[{self.INSERT}]", weight=insert_cost / 2) + delete = pynini.cross(sigma, pynini.accep(f"[{self.DELETE}]", weight=delete_cost / 2)) + substitute = pynini.cross(sigma, pynini.accep(f"[{self.SUBSTITUTE}]", weight=substitute_cost / 2)) + + edit = pynini.union(insert, delete, substitute).optimize() + + if bound: + sigma_star = pynini.closure(sigma) + self._e_i = sigma_star.copy() + for _ in range(bound): + self._e_i.concat(edit.ques).concat(sigma_star) + else: + self._e_i = edit.union(sigma).closure() + + self._e_i.optimize() + + right_factor_std = EditTransducer._right_factor(self._e_i) + # right_factor_ext allows 0-cost matching between token's raw form & auxiliary form + # e.g.: 'I' -> 'I#', 'AM' -> 'AM#' + right_factor_ext = ( + pynini.union( + *[ + pynini.cross( + pynini.accep(x, token_type=symbol_table), + pynini.accep(x + '#', token_type=symbol_table), + ) + for x in vocab + ] + ) + .optimize() + .closure() + ) + self._e_o = pynini.union(right_factor_std, right_factor_ext).closure().optimize() + + @staticmethod + def _right_factor(ifst: pynini.Fst) -> pynini.Fst: + ofst = pynini.invert(ifst) + syms = pynini.generated_symbols() + insert_label = syms.find(EditTransducer.INSERT) + delete_label = syms.find(EditTransducer.DELETE) + pairs = [(insert_label, delete_label), (delete_label, insert_label)] + right_factor = ofst.relabel_pairs(ipairs=pairs) + return right_factor + + def create_lattice(self, iexpr: pynini.FstLike, oexpr: pynini.FstLike) -> pynini.Fst: + lattice = (iexpr @ self._e_i) @ (self._e_o @ oexpr) + EditTransducer.check_wellformed_lattice(lattice) + return lattice + + @staticmethod + def check_wellformed_lattice(lattice: pynini.Fst) -> None: + if lattice.start() == pynini.NO_STATE_ID: + raise RuntimeError("Edit distance composition lattice is empty.") + + def compute_distance(self, iexpr: pynini.FstLike, oexpr: pynini.FstLike) -> float: + lattice = self.create_lattice(iexpr, oexpr) + # The shortest cost from all final states to the start state is + # equivalent to the cost of the shortest path. + start = lattice.start() + return float(pynini.shortestdistance(lattice, reverse=True)[start]) + + def compute_alignment(self, iexpr: pynini.FstLike, oexpr: pynini.FstLike) -> pynini.FstLike: + print(iexpr) + print(oexpr) + lattice = self.create_lattice(iexpr, oexpr) + alignment = pynini.shortestpath(lattice, nshortest=1, unique=True) + return alignment.optimize() + + +class ErrorStats: + def __init__(self): + self.num_ref_utts = 0 + self.num_hyp_utts = 0 + self.num_eval_utts = 0 # in both ref & hyp + self.num_hyp_without_ref = 0 + + self.C = 0 + self.S = 0 + self.I = 0 + self.D = 0 + self.token_error_rate = 0.0 + self.modified_token_error_rate = 0.0 + + self.num_utts_with_error = 0 + self.sentence_error_rate = 0.0 + + def to_json(self): + # return json.dumps(self.__dict__, indent=4) + return json.dumps(self.__dict__) + + def to_kaldi(self): + info = ( + F'%WER {self.token_error_rate:.2f} [ {self.S + self.D + self.I} / {self.C + self.S + self.D}, {self.I} ins, {self.D} del, {self.S} sub ]\n' + F'%SER {self.sentence_error_rate:.2f} [ {self.num_utts_with_error} / {self.num_eval_utts} ]\n' + ) + return info + + def to_summary(self): + summary = ( + '==================== Overall Statistics ====================\n' + F'num_ref_utts: {self.num_ref_utts}\n' + F'num_hyp_utts: {self.num_hyp_utts}\n' + F'num_hyp_without_ref: {self.num_hyp_without_ref}\n' + F'num_eval_utts: {self.num_eval_utts}\n' + F'sentence_error_rate: {self.sentence_error_rate:.2f}%\n' + F'token_error_rate: {self.token_error_rate:.2f}%\n' + F'modified_token_error_rate: {self.modified_token_error_rate:.2f}%\n' + F'token_stats:\n' + F' - tokens:{self.C + self.S + self.D:>7}\n' + F' - edits: {self.S + self.I + self.D:>7}\n' + F' - cor: {self.C:>7}\n' + F' - sub: {self.S:>7}\n' + F' - ins: {self.I:>7}\n' + F' - del: {self.D:>7}\n' + '============================================================\n' + ) + return summary + + +class Utterance: + def __init__(self, uid, text): + self.uid = uid + self.text = text + + +def LoadKaldiArc(filepath): + utts = {} + with open(filepath, 'r', encoding='utf8') as f: + for line in f: + line = line.strip() + if line: + cols = line.split(maxsplit=1) + assert len(cols) == 2 or len(cols) == 1 + uid = cols[0] + text = cols[1] if len(cols) == 2 else '' + if utts.get(uid) != None: + raise RuntimeError(F'Found duplicated utterence id {uid}') + utts[uid] = Utterance(uid, text) + return utts + + +def BreakHyphen(token: str): + # 'T-SHIRT' should also introduce new words into vocabulary, e.g.: + # 1. 'T' & 'SHIRT' + # 2. 'TSHIRT' + assert '-' in token + v = token.split('-') + v.append(token.replace('-', '')) + return v + + +def LoadGLM(rel_path): + ''' + glm.csv: + I'VE,I HAVE + GOING TO,GONNA + ... + T-SHIRT,T SHIRT,TSHIRT + + glm: + { + '': ["I'VE", 'I HAVE'], + '': ['GOING TO', 'GONNA'], + ... + '': ['T-SHIRT', 'T SHIRT', 'TSHIRT'], + } + ''' + logging.info(f'Loading GLM from {rel_path} ...') + + abs_path = os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path + reader = list(csv.reader(open(abs_path, encoding="utf-8"), delimiter=',')) + + glm = {} + for k, rule in enumerate(reader): + rule_name = f'' + glm[rule_name] = [phrase.strip() for phrase in rule] + logging.info(f' #rule: {len(glm)}') + + return glm + + +def SymbolEQ(symbol_table, i1, i2): + return symbol_table.find(i1).strip('#') == symbol_table.find(i2).strip('#') + + +def PrintSymbolTable(symbol_table: pynini.SymbolTable): + print('SYMBOL_TABLE:') + for k in range(symbol_table.num_symbols()): + sym = symbol_table.find(k) + assert symbol_table.find(sym) == k # symbol table's find can be used for bi-directional lookup (id <-> sym) + print(k, sym) + print() + + +def BuildSymbolTable(vocab) -> pynini.SymbolTable: + logging.info('Building symbol table ...') + symbol_table = pynini.SymbolTable() + symbol_table.add_symbol('') + + for w in vocab: + symbol_table.add_symbol(w) + logging.info(f' #symbols: {symbol_table.num_symbols()}') + + # PrintSymbolTable(symbol_table) + # symbol_table.write_text('symbol_table.txt') + return symbol_table + + +def BuildGLMTagger(glm, symbol_table) -> pynini.Fst: + logging.info('Building GLM tagger ...') + rule_taggers = [] + for rule_tag, rule in glm.items(): + for phrase in rule: + rule_taggers.append( + ( + pynutil.insert(pynini.accep(rule_tag, token_type=symbol_table)) + + pynini.accep(phrase, token_type=symbol_table) + + pynutil.insert(pynini.accep(rule_tag, token_type=symbol_table)) + ) + ) + + alphabet = pynini.union( + *[pynini.accep(sym, token_type=symbol_table) for k, sym in symbol_table if k != 0] # non-epsilon + ).optimize() + + tagger = pynini.cdrewrite( + pynini.union(*rule_taggers).optimize(), '', '', alphabet.closure() + ).optimize() # could be slow with large vocabulary + return tagger + + +def TokenWidth(token: str): + def CharWidth(c): + return 2 if (c >= '\u4e00') and (c <= '\u9fa5') else 1 + + return sum([CharWidth(c) for c in token]) + + +def PrintPrettyAlignment(raw_hyp, edit_ali, ref_ali, hyp_ali, stream=sys.stderr): + assert len(edit_ali) == len(ref_ali) and len(ref_ali) == len(hyp_ali) + + H = ' HYP# : ' + R = ' REF : ' + E = ' EDIT : ' + for i, e in enumerate(edit_ali): + h, r = hyp_ali[i], ref_ali[i] + e = '' if e == 'C' else e # don't bother printing correct edit-tag + + nr, nh, ne = TokenWidth(r), TokenWidth(h), TokenWidth(e) + n = max(nr, nh, ne) + 1 + + H += h + ' ' * (n - nh) + R += r + ' ' * (n - nr) + E += e + ' ' * (n - ne) + + print(F' HYP : {raw_hyp}', file=stream) + print(H, file=stream) + print(R, file=stream) + print(E, file=stream) + + +def ComputeTokenErrorRate(c, s, i, d): + assert (s + d + c) != 0 + num_edits = s + d + i + ref_len = c + s + d + hyp_len = c + s + i + return 100.0 * num_edits / ref_len, 100.0 * num_edits / max(ref_len, hyp_len) + + +def ComputeSentenceErrorRate(num_err_utts, num_utts): + assert num_utts != 0 + return 100.0 * num_err_utts / num_utts + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--logk', type=int, default=500, help='logging interval') + parser.add_argument( + '--tokenizer', choices=['whitespace', 'char'], default='whitespace', help='whitespace for WER, char for CER' + ) + parser.add_argument('--glm', type=str, default='glm_en.csv', help='glm') + parser.add_argument('--ref', type=str, required=True, help='reference kaldi arc file') + parser.add_argument('--hyp', type=str, required=True, help='hypothesis kaldi arc file') + parser.add_argument('result_file', type=str) + args = parser.parse_args() + logging.info(args) + + stats = ErrorStats() + + logging.info('Generating tokenizer ...') + if args.tokenizer == 'whitespace': + + def word_tokenizer(text): + return text.strip().split() + + tokenizer = word_tokenizer + elif args.tokenizer == 'char': + + def char_tokenizer(text): + return [c for c in text.strip().replace(' ', '')] + + tokenizer = char_tokenizer + else: + tokenizer = None + assert tokenizer + + logging.info('Loading REF & HYP ...') + ref_utts = LoadKaldiArc(args.ref) + hyp_utts = LoadKaldiArc(args.hyp) + + # check valid utterances in hyp that have matched non-empty reference + uids = [] + for uid in sorted(hyp_utts.keys()): + if uid in ref_utts.keys(): + if ref_utts[uid].text.strip(): # non-empty reference + uids.append(uid) + else: + logging.warning(F'Found {uid} with empty reference, skipping...') + else: + logging.warning(F'Found {uid} without reference, skipping...') + stats.num_hyp_without_ref += 1 + + stats.num_hyp_utts = len(hyp_utts) + stats.num_ref_utts = len(ref_utts) + stats.num_eval_utts = len(uids) + logging.info(f' #hyp:{stats.num_hyp_utts}, #ref:{stats.num_ref_utts}, #utts_to_evaluate:{stats.num_eval_utts}') + print(f' #hyp:{stats.num_hyp_utts}, #ref:{stats.num_ref_utts}, #utts_to_evaluate:{stats.num_eval_utts}') + + tokens = [] + for uid in uids: + ref_tokens = tokenizer(ref_utts[uid].text) + hyp_tokens = tokenizer(hyp_utts[uid].text) + for t in ref_tokens + hyp_tokens: + tokens.append(t) + if '-' in t: + tokens.extend(BreakHyphen(t)) + vocab_from_utts = list(set(tokens)) + logging.info(f' HYP&REF vocab size: {len(vocab_from_utts)}') + print(f' HYP&REF vocab size: {len(vocab_from_utts)}') + + assert args.glm + glm = LoadGLM(args.glm) + + tokens = [] + for rule in glm.values(): + for phrase in rule: + for t in tokenizer(phrase): + tokens.append(t) + if '-' in t: + tokens.extend(BreakHyphen(t)) + vocab_from_glm = list(set(tokens)) + logging.info(f' GLM vocab size: {len(vocab_from_glm)}') + print(f' GLM vocab size: {len(vocab_from_glm)}') + + vocab = list(set(vocab_from_utts + vocab_from_glm)) + logging.info(f'Global vocab size: {len(vocab)}') + print(f'Global vocab size: {len(vocab)}') + + symtab = BuildSymbolTable( + # Normal evaluation vocab + auxiliary form for alternative paths + GLM tags + vocab + + [x + '#' for x in vocab] + + [x for x in glm.keys()] + ) + glm_tagger = BuildGLMTagger(glm, symtab) + edit_transducer = EditTransducer(symbol_table=symtab, vocab=vocab) + print(edit_transducer) + + logging.info('Evaluating error rate ...') + print('Evaluating error rate ...') + fo = open(args.result_file, 'w+', encoding='utf8') + ndone = 0 + for uid in uids: + ref = ref_utts[uid].text + raw_hyp = hyp_utts[uid].text + + ref_fst = pynini.accep(' '.join(tokenizer(ref)), token_type=symtab) + print(ref_fst) + + # print(ref_fst.string(token_type = symtab)) + + raw_hyp_fst = pynini.accep(' '.join(tokenizer(raw_hyp)), token_type=symtab) + # print(raw_hyp_fst.string(token_type = symtab)) + + # Say, we have: + # RULE_001: "I'M" <-> "I AM" + # REF: HEY I AM HERE + # HYP: HEY I'M HERE + # + # We want to expand HYP with GLM rules(marked with auxiliary #) + # HYP#: HEY {I'M | I# AM#} HERE + # REF is honored to keep its original form. + # + # This could be considered as a flexible on-the-fly TN towards HYP. + + # 1. GLM rule tagging: + # HEY I'M HERE + # -> + # HEY I'M HERE + lattice = (raw_hyp_fst @ glm_tagger).optimize() + tagged_ir = pynini.shortestpath(lattice, nshortest=1, unique=True).string(token_type=symtab) + # print(hyp_tagged) + + # 2. GLM rule expansion: + # HEY I'M HERE + # -> + # sausage-like fst: HEY {I'M | I# AM#} HERE + tokens = tagged_ir.split() + sausage = pynini.accep('', token_type=symtab) + i = 0 + while i < len(tokens): # invariant: tokens[0, i) has been built into fst + forms = [] + if tokens[i].startswith(''): # rule segment + rule_name = tokens[i] + rule = glm[rule_name] + # pre-condition: i -> ltag + raw_form = '' + for j in range(i + 1, len(tokens)): + if tokens[j] == rule_name: + raw_form = ' '.join(tokens[i + 1 : j]) + break + assert raw_form + # post-condition: i -> ltag, j -> rtag + + forms.append(raw_form) + for phrase in rule: + if phrase != raw_form: + forms.append(' '.join([x + '#' for x in phrase.split()])) + i = j + 1 + else: # normal token segment + token = tokens[i] + forms.append(token) + if "-" in token: # token with hyphen yields extra forms + forms.append(' '.join([x + '#' for x in token.split('-')])) # 'T-SHIRT' -> 'T# SHIRT#' + forms.append(token.replace('-', '') + '#') # 'T-SHIRT' -> 'TSHIRT#' + i += 1 + + sausage_segment = pynini.union(*[pynini.accep(x, token_type=symtab) for x in forms]).optimize() + sausage += sausage_segment + hyp_fst = sausage.optimize() + print(hyp_fst) + + # Utterance-Level error rate evaluation + alignment = edit_transducer.compute_alignment(ref_fst, hyp_fst) + print("alignment", alignment) + + distance = 0.0 + C, S, I, D = 0, 0, 0, 0 # Cor, Sub, Ins, Del + edit_ali, ref_ali, hyp_ali = [], [], [] + for state in alignment.states(): + for arc in alignment.arcs(state): + i, o = arc.ilabel, arc.olabel + if i != 0 and o != 0 and SymbolEQ(symtab, i, o): + e = 'C' + r, h = symtab.find(i), symtab.find(o) + + C += 1 + distance += 0.0 + elif i != 0 and o != 0 and not SymbolEQ(symtab, i, o): + e = 'S' + r, h = symtab.find(i), symtab.find(o) + + S += 1 + distance += 1.0 + elif i == 0 and o != 0: + e = 'I' + r, h = '*', symtab.find(o) + + I += 1 + distance += 1.0 + elif i != 0 and o == 0: + e = 'D' + r, h = symtab.find(i), '*' + + D += 1 + distance += 1.0 + else: + raise RuntimeError + + edit_ali.append(e) + ref_ali.append(r) + hyp_ali.append(h) + # assert(distance == edit_transducer.compute_distance(ref_fst, sausage)) + + utt_ter, utt_mter = ComputeTokenErrorRate(C, S, I, D) + # print(F'{{"uid":{uid}, "score":{-distance}, "TER":{utt_ter:.2f}, "mTER":{utt_mter:.2f}, "cor":{C}, "sub":{S}, "ins":{I}, "del":{D}}}', file=fo) + # PrintPrettyAlignment(raw_hyp, edit_ali, ref_ali, hyp_ali, fo) + + if utt_ter > 0: + stats.num_utts_with_error += 1 + + stats.C += C + stats.S += S + stats.I += I + stats.D += D + + ndone += 1 + if ndone % args.logk == 0: + logging.info(f'{ndone} utts evaluated.') + logging.info(f'{ndone} utts evaluated in total.') + + # Corpus-Level evaluation + stats.token_error_rate, stats.modified_token_error_rate = ComputeTokenErrorRate(stats.C, stats.S, stats.I, stats.D) + stats.sentence_error_rate = ComputeSentenceErrorRate(stats.num_utts_with_error, stats.num_eval_utts) + + print(stats.to_json(), file=fo) + # print(stats.to_kaldi()) + # print(stats.to_summary(), file=fo) + + fo.close() diff --git a/utils/speechio/error_rate_zh.py b/utils/speechio/error_rate_zh.py new file mode 100644 index 0000000..6871a07 --- /dev/null +++ b/utils/speechio/error_rate_zh.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +# coding=utf8 + +# Copyright 2021 Jiayu DU + +import sys +import argparse +import json +import logging +logging.basicConfig(stream=sys.stderr, level=logging.INFO, format='[%(levelname)s] %(message)s') + +DEBUG = None + +def GetEditType(ref_token, hyp_token): + if ref_token == None and hyp_token != None: + return 'I' + elif ref_token != None and hyp_token == None: + return 'D' + elif ref_token == hyp_token: + return 'C' + elif ref_token != hyp_token: + return 'S' + else: + raise RuntimeError + +class AlignmentArc: + def __init__(self, src, dst, ref, hyp): + self.src = src + self.dst = dst + self.ref = ref + self.hyp = hyp + self.edit_type = GetEditType(ref, hyp) + +def similarity_score_function(ref_token, hyp_token): + return 0 if (ref_token == hyp_token) else -1.0 + +def insertion_score_function(token): + return -1.0 + +def deletion_score_function(token): + return -1.0 + +def EditDistance( + ref, + hyp, + similarity_score_function = similarity_score_function, + insertion_score_function = insertion_score_function, + deletion_score_function = deletion_score_function): + assert(len(ref) != 0) + class DPState: + def __init__(self): + self.score = -float('inf') + # backpointer + self.prev_r = None + self.prev_h = None + + def print_search_grid(S, R, H, fstream): + print(file=fstream) + for r in range(R): + for h in range(H): + print(F'[{r},{h}]:{S[r][h].score:4.3f}:({S[r][h].prev_r},{S[r][h].prev_h}) ', end='', file=fstream) + print(file=fstream) + + R = len(ref) + 1 + H = len(hyp) + 1 + + # Construct DP search space, a (R x H) grid + S = [ [] for r in range(R) ] + for r in range(R): + S[r] = [ DPState() for x in range(H) ] + + # initialize DP search grid origin, S(r = 0, h = 0) + S[0][0].score = 0.0 + S[0][0].prev_r = None + S[0][0].prev_h = None + + # initialize REF axis + for r in range(1, R): + S[r][0].score = S[r-1][0].score + deletion_score_function(ref[r-1]) + S[r][0].prev_r = r-1 + S[r][0].prev_h = 0 + + # initialize HYP axis + for h in range(1, H): + S[0][h].score = S[0][h-1].score + insertion_score_function(hyp[h-1]) + S[0][h].prev_r = 0 + S[0][h].prev_h = h-1 + + best_score = S[0][0].score + best_state = (0, 0) + + for r in range(1, R): + for h in range(1, H): + sub_or_cor_score = similarity_score_function(ref[r-1], hyp[h-1]) + new_score = S[r-1][h-1].score + sub_or_cor_score + if new_score >= S[r][h].score: + S[r][h].score = new_score + S[r][h].prev_r = r-1 + S[r][h].prev_h = h-1 + + del_score = deletion_score_function(ref[r-1]) + new_score = S[r-1][h].score + del_score + if new_score >= S[r][h].score: + S[r][h].score = new_score + S[r][h].prev_r = r - 1 + S[r][h].prev_h = h + + ins_score = insertion_score_function(hyp[h-1]) + new_score = S[r][h-1].score + ins_score + if new_score >= S[r][h].score: + S[r][h].score = new_score + S[r][h].prev_r = r + S[r][h].prev_h = h-1 + + best_score = S[R-1][H-1].score + best_state = (R-1, H-1) + + if DEBUG: + print_search_grid(S, R, H, sys.stderr) + + # Backtracing best alignment path, i.e. a list of arcs + # arc = (src, dst, ref, hyp, edit_type) + # src/dst = (r, h), where r/h refers to search grid state-id along Ref/Hyp axis + best_path = [] + r, h = best_state[0], best_state[1] + prev_r, prev_h = S[r][h].prev_r, S[r][h].prev_h + score = S[r][h].score + # loop invariant: + # 1. (prev_r, prev_h) -> (r, h) is a "forward arc" on best alignment path + # 2. score is the value of point(r, h) on DP search grid + while prev_r != None or prev_h != None: + src = (prev_r, prev_h) + dst = (r, h) + if (r == prev_r + 1 and h == prev_h + 1): # Substitution or correct + arc = AlignmentArc(src, dst, ref[prev_r], hyp[prev_h]) + elif (r == prev_r + 1 and h == prev_h): # Deletion + arc = AlignmentArc(src, dst, ref[prev_r], None) + elif (r == prev_r and h == prev_h + 1): # Insertion + arc = AlignmentArc(src, dst, None, hyp[prev_h]) + else: + raise RuntimeError + best_path.append(arc) + r, h = prev_r, prev_h + prev_r, prev_h = S[r][h].prev_r, S[r][h].prev_h + score = S[r][h].score + + best_path.reverse() + return (best_path, best_score) + +def PrettyPrintAlignment(alignment, stream = sys.stderr): + def get_token_str(token): + if token == None: + return "*" + return token + + def is_double_width_char(ch): + if (ch >= '\u4e00') and (ch <= '\u9fa5'): # codepoint ranges for Chinese chars + return True + # TODO: support other double-width-char language such as Japanese, Korean + else: + return False + + def display_width(token_str): + m = 0 + for c in token_str: + if is_double_width_char(c): + m += 2 + else: + m += 1 + return m + + R = ' REF : ' + H = ' HYP : ' + E = ' EDIT : ' + for arc in alignment: + r = get_token_str(arc.ref) + h = get_token_str(arc.hyp) + e = arc.edit_type if arc.edit_type != 'C' else '' + + nr, nh, ne = display_width(r), display_width(h), display_width(e) + n = max(nr, nh, ne) + 1 + + R += r + ' ' * (n-nr) + H += h + ' ' * (n-nh) + E += e + ' ' * (n-ne) + + print(R, file=stream) + print(H, file=stream) + print(E, file=stream) + +def CountEdits(alignment): + c, s, i, d = 0, 0, 0, 0 + for arc in alignment: + if arc.edit_type == 'C': + c += 1 + elif arc.edit_type == 'S': + s += 1 + elif arc.edit_type == 'I': + i += 1 + elif arc.edit_type == 'D': + d += 1 + else: + raise RuntimeError + return (c, s, i, d) + +def ComputeTokenErrorRate(c, s, i, d): + return 100.0 * (s + d + i) / (s + d + c) + +def ComputeSentenceErrorRate(num_err_utts, num_utts): + assert(num_utts != 0) + return 100.0 * num_err_utts / num_utts + + +class EvaluationResult: + def __init__(self): + self.num_ref_utts = 0 + self.num_hyp_utts = 0 + self.num_eval_utts = 0 # seen in both ref & hyp + self.num_hyp_without_ref = 0 + + self.C = 0 + self.S = 0 + self.I = 0 + self.D = 0 + self.token_error_rate = 0.0 + + self.num_utts_with_error = 0 + self.sentence_error_rate = 0.0 + + def to_json(self): + return json.dumps(self.__dict__) + + def to_kaldi(self): + info = ( + F'%WER {self.token_error_rate:.2f} [ {self.S + self.D + self.I} / {self.C + self.S + self.D}, {self.I} ins, {self.D} del, {self.S} sub ]\n' + F'%SER {self.sentence_error_rate:.2f} [ {self.num_utts_with_error} / {self.num_eval_utts} ]\n' + ) + return info + + def to_sclite(self): + return "TODO" + + def to_espnet(self): + return "TODO" + + def to_summary(self): + #return json.dumps(self.__dict__, indent=4) + summary = ( + '==================== Overall Statistics ====================\n' + F'num_ref_utts: {self.num_ref_utts}\n' + F'num_hyp_utts: {self.num_hyp_utts}\n' + F'num_hyp_without_ref: {self.num_hyp_without_ref}\n' + F'num_eval_utts: {self.num_eval_utts}\n' + F'sentence_error_rate: {self.sentence_error_rate:.2f}%\n' + F'token_error_rate: {self.token_error_rate:.2f}%\n' + F'token_stats:\n' + F' - tokens:{self.C + self.S + self.D:>7}\n' + F' - edits: {self.S + self.I + self.D:>7}\n' + F' - cor: {self.C:>7}\n' + F' - sub: {self.S:>7}\n' + F' - ins: {self.I:>7}\n' + F' - del: {self.D:>7}\n' + '============================================================\n' + ) + return summary + + +class Utterance: + def __init__(self, uid, text): + self.uid = uid + self.text = text + + +def LoadUtterances(filepath, format): + utts = {} + if format == 'text': # utt_id word1 word2 ... + with open(filepath, 'r', encoding='utf8') as f: + for line in f: + line = line.strip() + if line: + cols = line.split(maxsplit=1) + assert(len(cols) == 2 or len(cols) == 1) + uid = cols[0] + text = cols[1] if len(cols) == 2 else '' + if utts.get(uid) != None: + raise RuntimeError(F'Found duplicated utterence id {uid}') + utts[uid] = Utterance(uid, text) + else: + raise RuntimeError(F'Unsupported text format {format}') + return utts + + +def tokenize_text(text, tokenizer): + if tokenizer == 'whitespace': + return text.split() + elif tokenizer == 'char': + return [ ch for ch in ''.join(text.split()) ] + else: + raise RuntimeError(F'ERROR: Unsupported tokenizer {tokenizer}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + # optional + parser.add_argument('--tokenizer', choices=['whitespace', 'char'], default='whitespace', help='whitespace for WER, char for CER') + parser.add_argument('--ref-format', choices=['text'], default='text', help='reference format, first col is utt_id, the rest is text') + parser.add_argument('--hyp-format', choices=['text'], default='text', help='hypothesis format, first col is utt_id, the rest is text') + # required + parser.add_argument('--ref', type=str, required=True, help='input reference file') + parser.add_argument('--hyp', type=str, required=True, help='input hypothesis file') + + parser.add_argument('result_file', type=str) + args = parser.parse_args() + logging.info(args) + + ref_utts = LoadUtterances(args.ref, args.ref_format) + hyp_utts = LoadUtterances(args.hyp, args.hyp_format) + + r = EvaluationResult() + + # check valid utterances in hyp that have matched non-empty reference + eval_utts = [] + r.num_hyp_without_ref = 0 + for uid in sorted(hyp_utts.keys()): + if uid in ref_utts.keys(): # TODO: efficiency + if ref_utts[uid].text.strip(): # non-empty reference + eval_utts.append(uid) + else: + logging.warn(F'Found {uid} with empty reference, skipping...') + else: + logging.warn(F'Found {uid} without reference, skipping...') + r.num_hyp_without_ref += 1 + + r.num_hyp_utts = len(hyp_utts) + r.num_ref_utts = len(ref_utts) + r.num_eval_utts = len(eval_utts) + + with open(args.result_file, 'w+', encoding='utf8') as fo: + for uid in eval_utts: + ref = ref_utts[uid] + hyp = hyp_utts[uid] + + alignment, score = EditDistance( + tokenize_text(ref.text, args.tokenizer), + tokenize_text(hyp.text, args.tokenizer) + ) + + c, s, i, d = CountEdits(alignment) + utt_ter = ComputeTokenErrorRate(c, s, i, d) + + # utt-level evaluation result + print(F'{{"uid":{uid}, "score":{score}, "ter":{utt_ter:.2f}, "cor":{c}, "sub":{s}, "ins":{i}, "del":{d}}}', file=fo) + PrettyPrintAlignment(alignment, fo) + + r.C += c + r.S += s + r.I += i + r.D += d + + if utt_ter > 0: + r.num_utts_with_error += 1 + + # corpus level evaluation result + r.sentence_error_rate = ComputeSentenceErrorRate(r.num_utts_with_error, r.num_eval_utts) + r.token_error_rate = ComputeTokenErrorRate(r.C, r.S, r.I, r.D) + + print(r.to_summary(), file=fo) + + print(r.to_json()) + print(r.to_kaldi()) diff --git a/utils/speechio/glm_en.csv b/utils/speechio/glm_en.csv new file mode 100644 index 0000000..2bc14f7 --- /dev/null +++ b/utils/speechio/glm_en.csv @@ -0,0 +1,744 @@ +I'M,I AM +I'LL,I WILL +I'D,I HAD +I'VE,I HAVE +I WOULD'VE,I'D HAVE +YOU'RE,YOU ARE +YOU'LL,YOU WILL +YOU'D,YOU WOULD +YOU'VE,YOU HAVE +HE'S,HE IS,HE WAS +HE'LL,HE WILL +HE'D,HE HAD +SHE'S,SHE IS,SHE WAS +SHE'LL,SHE WILL +SHE'D,SHE HAD +IT'S,IT IS,IT WAS +IT'LL,IT WILL +WE'RE,WE ARE,WE WERE +WE'LL,WE WILL +WE'D,WE WOULD +WE'VE,WE HAVE +WHO'LL,WHO WILL +THEY'RE,THEY ARE +THEY'LL,THEY WILL +THAT'S,THAT IS,THAT WAS +THAT'LL,THAT WILL +HERE'S,HERE IS,HERE WAS +THERE'S,THERE IS,THERE WAS +WHERE'S,WHERE IS,WHERE WAS +WHAT'S,WHAT IS,WHAT WAS +LET'S,LET US +WHO'S,WHO IS +ONE'S,ONE IS +THERE'LL,THERE WILL +SOMEBODY'S,SOMEBODY IS +EVERYBODY'S,EVERYBODY IS +WOULD'VE,WOULD HAVE +CAN'T,CANNOT,CAN NOT +HADN'T,HAD NOT +HASN'T,HAS NOT +HAVEN'T,HAVE NOT +ISN'T,IS NOT +AREN'T,ARE NOT +WON'T,WILL NOT +WOULDN'T,WOULD NOT +SHOULDN'T,SHOULD NOT +DON'T,DO NOT +DIDN'T,DID NOT +GOTTA,GOT TO +GONNA,GOING TO +WANNA,WANT TO +LEMME,LET ME +GIMME,GIVE ME +DUNNO,DON'T KNOW +GOTCHA,GOT YOU +KINDA,KIND OF +MYSELF,MY SELF +YOURSELF,YOUR SELF +HIMSELF,HIM SELF +HERSELF,HER SELF +ITSELF,IT SELF +OURSELVES,OUR SELVES +OKAY,OK,O K +Y'ALL,YALL,YOU ALL +'CAUSE,'COS,CUZ,BECAUSE +FUCKIN',FUCKING +KILLING,KILLIN' +EVERYDAY,EVERY DAY +DOCTOR,DR,DR. +MRS,MISSES,MISSUS +MR,MR.,MISTER +SR,SR.,SENIOR +JR,JR.,JUNIOR +ST,ST.,SAINT +VOL,VOL.,VOLUME +CM,CENTIMETER,CENTIMETRE +MM,MILLIMETER,MILLIMETRE +KM,KILOMETER,KILOMETRE +KB,KILOBYTES,KILO BYTES,K B +MB,MEGABYTES,MEGA BYTES +GB,GIGABYTES,GIGA BYTES,G B +THOUSAND,THOUSAND AND +HUNDRED,HUNDRED AND +A HUNDRED,ONE HUNDRED +TWO THOUSAND AND,TWENTY,TWO THOUSAND +STORYTELLER,STORY TELLER +TSHIRT,T SHIRT +TSHIRTS,T SHIRTS +LEUKAEMIA,LEUKEMIA +OESTROGEN,ESTROGEN +ACKNOWLEDGMENT,ACKNOWLEDGEMENT +JUDGMENT,JUDGEMENT +MAMMA,MAMA +DINING,DINNING +FLACK,FLAK +LEARNT,LEARNED +BLONDE,BLOND +JUMPSTART,JUMP START +RIGHTNOW,RIGHT NOW +EVERYONE,EVERY ONE +NAME'S,NAME IS +FAMILY'S,FAMILY IS +COMPANY'S,COMPANY HAS +GRANDKID,GRAND KID +GRANDKIDS,GRAND KIDS +MEALTIMES,MEAL TIMES +ALRIGHT,ALL RIGHT +GROWNUP,GROWN UP +GROWNUPS,GROWN UPS +SCHOOLDAYS,SCHOOL DAYS +SCHOOLCHILDREN,SCHOOL CHILDREN +CASEBOOK,CASE BOOK +HUNGOVER,HUNG OVER +HANDCLAPS,HAND CLAPS +HANDCLAP,HAND CLAP +HEATWAVE,HEAT WAVE +ADDON,ADD ON +ONTO,ON TO +INTO,IN TO +GOTO,GO TO +GUNSHOT,GUN SHOT +MOTHERFUCKER,MOTHER FUCKER +OFTENTIMES,OFTEN TIMES +SARTRE'S,SARTRE IS +NONSTARTER,NON STARTER +NONSTARTERS,NON STARTERS +LONGTIME,LONG TIME +POLICYMAKERS,POLICY MAKERS +ANYMORE,ANY MORE +CANADA'S,CANADA IS +CELLPHONE,CELL PHONE +WORKPLACE,WORK PLACE +UNDERESTIMATING,UNDER ESTIMATING +CYBERSECURITY,CYBER SECURITY +NORTHEAST,NORTH EAST +ANYTIME,ANY TIME +LIVESTREAM,LIVE STREAM +LIVESTREAMS,LIVE STREAMS +WEBCAM,WEB CAM +EMAIL,E MAIL +ECAM,E CAM +VMIX,V MIX +SETUP,SET UP +SMARTPHONE,SMART PHONE +MULTICASTING,MULTI CASTING +CHITCHAT,CHIT CHAT +SEMIFINAL,SEMI FINAL +SEMIFINALS,SEMI FINALS +BBQ,BARBECUE +STORYLINE,STORY LINE +STORYLINES,STORY LINES +BRO,BROTHER +BROS,BROTHERS +OVERPROTECTIIVE,OVER PROTECTIVE +TIMEOUT,TIME OUT +ADVISOR,ADVISER +TIMBERWOLVES,TIMBER WOLVES +WEBPAGE,WEB PAGE +NEWCOMER,NEW COMER +DELMAR,DEL MAR +NETPLAY,NET PLAY +STREETSIDE,STREET SIDE +COLOURED,COLORED +COLOURFUL,COLORFUL +O,ZERO +ETCETERA,ET CETERA +FUNDRAISING,FUND RAISING +RAINFOREST,RAIN FOREST +BREATHTAKING,BREATH TAKING +WIKIPAGE,WIKI PAGE +OVERTIME,OVER TIME +TRAIN'S TRAIN IS +ANYONE,ANY ONE +PHYSIOTHERAPY,PHYSIO THERAPY +ANYBODY,ANY BODY +BOTTLECAPS,BOTTLE CAPS +BOTTLECAP,BOTTLE CAP +STEPFATHER'S,STEP FATHER'S +STEPFATHER,STEP FATHER +WARTIME,WAR TIME +SCREENSHOT,SCREEN SHOT +TIMELINE,TIME LINE +CITY'S,CITY IS +NONPROFIT,NON PROFIT +KPOP,K POP +HOMEBASE,HOME BASE +LIFELONG,LIFE LONG +LAWSUITS,LAW SUITS +MULTIBILLION,MULTI BILLION +ROADMAP,ROAD MAP +GUY'S,GUY IS +CHECKOUT,CHECK OUT +SQUARESPACE,SQUARE SPACE +REDLINING,RED LINING +BASE'S,BASE IS +TAKEAWAY,TAKE AWAY +CANDYLAND,CANDY LAND +ANTISOCIAL,ANTI SOCIAL +CASEWORK,CASE WORK +RIGOR,RIGOUR +ORGANIZATIONS,ORGANISATIONS +ORGANIZATION,ORGANISATION +SIGNPOST,SIGN POST +WWII,WORLD WAR TWO +WINDOWPANE,WINDOW PANE +SUREFIRE,SURE FIRE +MOUNTAINTOP,MOUNTAIN TOP +SALESPERSON,SALES PERSON +NETWORK,NET WORK +MINISERIES,MINI SERIES +EDWARDS'S,EDWARDS IS +INTERSUBJECTIVITY,INTER SUBJECTIVITY +LIBERALISM'S,LIBERALISM IS +TAGLINE,TAG LINE +SHINETHEORY,SHINE THEORY +CALLYOURGIRLFRIEND,CALL YOUR GIRLFRIEND +STARTUP,START UP +BREAKUP,BREAK UP +RADIOTOPIA,RADIO TOPIA +HEARTBREAKING,HEART BREAKING +AUTOIMMUNE,AUTO IMMUNE +SINISE'S,SINISE IS +KICKBACK,KICK BACK +FOGHORN,FOG HORN +BADASS,BAD ASS +POWERAMERICAFORWARD,POWER AMERICA FORWARD +GOOGLE'S,GOOGLE IS +ROLEPLAY,ROLE PLAY +PRICE'S,PRICE IS +STANDOFF,STAND OFF +FOREVER,FOR EVER +GENERAL'S,GENERAL IS +DOG'S,DOG IS +AUDIOBOOK,AUDIO BOOK +ANYWAY,ANY WAY +PIGEONHOLE,PIEGON HOLE +EGGSHELLS,EGG SHELLS +VACCINE'S,VACCINE IS +WORKOUT,WORK OUT +ADMINISTRATOR'S,ADMINISTRATOR IS +FUCKUP,FUCK UP +RUNOFFS,RUN OFFS +COLORWAY,COLOR WAY +WAITLIST,WAIT LIST +HEALTHCARE,HEALTH CARE +TEXTBOOK,TEXT BOOK +CALLBACK,CALL BACK +PARTYGOERS,PARTY GOERS +SOMEDAY,SOME DAY +NIGHTGOWN,NIGHT GOWN +STANDALONG,STAND ALONG +BUSSINESSWOMAN,BUSSINESS WOMAN +STORYTELLING,STORY TELLING +MARKETPLACE,MARKET PLACE +CRATEJOY,CRATE JOY +OUTPERFORMED,OUT PERFORMED +TRUEBOTANICALS,TRUE BOTANICALS +NONFICTION,NON FICTION +SPINOFF,SPIN OFF +MOTHERFUCKING,MOTHER FUCKING +TRACKLIST,TRACK LIST +GODDAMN,GOD DAMN +PORNHUB,PORN HUB +UNDERAGE,UNDER AGE +GOODBYE,GOOD BYE +HARDCORE,HARD CORE +TRUCK'S,TRUCK IS +COUNTERSTEERING,COUNTER STEERING +BUZZWORD,BUZZ WORD +SUBCOMPONENTS,SUB COMPONENTS +MOREOVER,MORE OVER +PICKUP,PICK UP +NEWSLETTER,NEWS LETTER +KEYWORD,KEY WORD +LOGIN,LOG IN +TOOLBOX,TOOL BOX +LINK'S,LINK IS +PRIMIALVIDEO,PRIMAL VIDEO +DOTNET,DOT NET +AIRSTRIKE,AIR STRIKE +HAIRSTYLE,HAIR STYLE +TOWNSFOLK,TOWNS FOLK +GOLDFISH,GOLD FISH +TOM'S,TOM IS +HOMETOWN,HOME TOWN +CORONAVIRUS,CORONA VIRUS +PLAYSTATION,PLAY STATION +TOMORROW,TO MORROW +TIMECONSUMING,TIME CONSUMING +POSTWAR,POST WAR +HANDSON,HANDS ON +SHAKEUP,SHAKE UP +ECOMERS,E COMERS +COFOUNDER,CO FOUNDER +HIGHEND,HIGH END +INPERSON,IN PERSON +GROWNUP,GROWN UP +SELFREGULATION,SELF REGULATION +INDEPTH,IN DEPTH +ALLTIME,ALL TIME +LONGTERM,LONG TERM +SOCALLED,SO CALLED +SELFCONFIDENCE,SELF CONFIDENCE +STANDUP,STAND UP +MINDBOGGLING,MIND BOGGLING +BEINGFOROTHERS,BEING FOR OTHERS +COWROTE,CO WROTE +COSTARRED,CO STARRED +EDITORINCHIEF,EDITOR IN CHIEF +HIGHSPEED,HIGH SPEED +DECISIONMAKING,DECISION MAKING +WELLBEING,WELL BEING +NONTRIVIAL,NON TRIVIAL +PREEXISTING,PRE EXISTING +STATEOWNED,STATE OWNED +PLUGIN,PLUG IN +PROVERSION,PRO VERSION +OPTIN,OPT IN +FOLLOWUP,FOLLOW UP +FOLLOWUPS,FOLLOW UPS +WIFI,WI FI +THIRDPARTY,THIRD PARTY +PROFESSIONALLOOKING,PROFESSIONAL LOOKING +FULLSCREEN,FULL SCREEN +BUILTIN,BUILT IN +MULTISTREAM,MULTI STREAM +LOWCOST,LOW COST +RESTREAM,RE STREAM +GAMECHANGER,GAME CHANGER +WELLDEVELOPED,WELL DEVELOPED +QUARTERINCH,QUARTER INCH +FASTFASHION,FAST FASHION +ECOMMERCE,E COMMERCE +PRIZEWINNING,PRIZE WINNING +NEVERENDING,NEVER ENDING +MINDBLOWING,MIND BLOWING +REALLIFE,REAL LIFE +REOPEN,RE OPEN +ONDEMAND,ON DEMAND +PROBLEMSOLVING,PROBLEM SOLVING +HEAVYHANDED,HEAVY HANDED +OPENENDED,OPEN ENDED +SELFCONTROL,SELF CONTROL +WELLMEANING,WELL MEANING +COHOST,CO HOST +RIGHTSBASED,RIGHTS BASED +HALFBROTHER,HALF BROTHER +FATHERINLAW,FATHER IN LAW +COAUTHOR,CO AUTHOR +REELECTION,RE ELECTION +SELFHELP,SELF HELP +PROLIFE,PRO LIFE +ANTIDUKE,ANTI DUKE +POSTSTRUCTURALIST,POST STRUCTURALIST +COFOUNDED,CO FOUNDED +XRAY,X RAY +ALLAROUND,ALL AROUND +HIGHTECH,HIGH TECH +TMOBILE,T MOBILE +INHOUSE,IN HOUSE +POSTMORTEM,POST MORTEM +LITTLEKNOWN,LITTLE KNOWN +FALSEPOSITIVE,FALSE POSITIVE +ANTIVAXXER,ANTI VAXXER +EMAILS,E MAILS +DRIVETHROUGH,DRIVE THROUGH +DAYTODAY,DAY TO DAY +COSTAR,CO STAR +EBAY,E BAY +KOOLAID,KOOL AID +ANTIDEMOCRATIC,ANTI DEMOCRATIC +MIDDLEAGED,MIDDLE AGED +SHORTLIVED,SHORT LIVED +BESTSELLING,BEST SELLING +TICTACS,TIC TACS +UHHUH,UH HUH +MULTITANK,MULTI TANK +JAWDROPPING,JAW DROPPING +LIVESTREAMING,LIVE STREAMING +HARDWORKING,HARD WORKING +BOTTOMDWELLING,BOTTOM DWELLING +PRESHOW,PRE SHOW +HANDSFREE,HANDS FREE +TRICKORTREATING,TRICK OR TREATING +PRERECORDED,PRE RECORDED +DOGOODERS,DO GOODERS +WIDERANGING,WIDE RANGING +LIFESAVING,LIFE SAVING +SKIREPORT,SKI REPORT +SNOWBASE,SNOW BASE +JAYZ,JAY Z +SPIDERMAN,SPIDER MAN +FREEKICK,FREE KICK +EDWARDSHELAIRE,EDWARDS HELAIRE +SHORTTERM,SHORT TERM +HAVENOTS,HAVE NOTS +SELFINTEREST,SELF INTEREST +SELFINTERESTED,SELF INTERESTED +SELFCOMPASSION,SELF COMPASSION +MACHINELEARNING,MACHINE LEARNING +COAUTHORED,CO AUTHORED +NONGOVERNMENT,NON GOVERNMENT +SUBSAHARAN,SUB SAHARAN +COCHAIR,CO CHAIR +LARGESCALE,LARGE SCALE +VIDEOONDEMAND,VIDEO ON DEMAND +FIRSTCLASS,FIRST CLASS +COFOUNDERS,CO FOUNDERS +COOP,CO OP +PREORDERS,PRE ORDERS +DOUBLEENTRY,DOUBLE ENTRY +SELFCONFIDENT,SELF CONFIDENT +SELFPORTRAIT,SELF PORTRAIT +NONWHITE,NON WHITE +ONBOARD,ON BOARD +HALFLIFE,HALF LIFE +ONCOURT,ON COURT +SCIFI,SCI FI +XMEN,X MEN +DAYLEWIS,DAY LEWIS +LALALAND,LA LA LAND +AWARDWINNING,AWARD WINNING +BOXOFFICE,BOX OFFICE +TRIDACTYLS,TRI DACTYLS +TRIDACTYL,TRI DACTYL +MEDIUMSIZED,MEDIUM SIZED +POSTSECONDARY,POST SECONDARY +FULLTIME,FULL TIME +GOKART,GO KART +OPENAIR,OPEN AIR +WELLKNOWN,WELL KNOWN +ICECREAM,ICE CREAM +EARTHMOON,EARTH MOON +STATEOFTHEART,STATE OF THE ART +BSIDE,B SIDE +EASTWEST,EAST WEST +ALLSTAR,ALL STAR +RUNNERUP,RUNNER UP +HORSEDRAWN,HORSE DRAWN +OPENSOURCE,OPEN SOURCE +PURPOSEBUILT,PURPOSE BUILT +SQUAREFREE,SQUARE FREE +PRESENTDAY,PRESENT DAY +CANADAUNITED,CANADA UNITED +HOTCHPOTCH,HOTCH POTCH +LOWLYING,LOW LYING +RIGHTHANDED,RIGHT HANDED +PEARSHAPED,PEAR SHAPED +BESTKNOWN,BEST KNOWN +FULLLENGTH,FULL LENGTH +YEARROUND,YEAR ROUND +PREELECTION,PRE ELECTION +RERECORD,RE RECORD +MINIALBUM,MINI ALBUM +LONGESTRUNNING,LONGEST RUNNING +ALLIRELAND,ALL IRELAND +NORTHWESTERN,NORTH WESTERN +PARTTIME,PART TIME +NONGOVERNMENTAL,NON GOVERNMENTAL +ONLINE,ON LINE +ONAIR,ON AIR +NORTHSOUTH,NORTH SOUTH +RERELEASED,RE RELEASED +LEFTHANDED,LEFT HANDED +BSIDES,B SIDES +ANGLOSAXON,ANGLO SAXON +SOUTHSOUTHEAST,SOUTH SOUTHEAST +CROSSCOUNTRY,CROSS COUNTRY +REBUILT,RE BUILT +FREEFORM,FREE FORM +SCOOBYDOO,SCOOBY DOO +ATLARGE,AT LARGE +COUNCILMANAGER,COUNCIL MANAGER +LONGRUNNING,LONG RUNNING +PREWAR,PRE WAR +REELECTED,RE ELECTED +HIGHSCHOOL,HIGH SCHOOL +RUNNERSUP,RUNNERS UP +NORTHWEST,NORTH WEST +WEBBASED,WEB BASED +HIGHQUALITY,HIGH QUALITY +RIGHTWING,RIGHT WING +LANEFOX,LANE FOX +PAYPERVIEW,PAY PER VIEW +COPRODUCTION,CO PRODUCTION +NONPARTISAN,NON PARTISAN +FIRSTPERSON,FIRST PERSON +WORLDRENOWNED,WORLD RENOWNED +VICEPRESIDENT,VICE PRESIDENT +PROROMAN,PRO ROMAN +COPRODUCED,CO PRODUCED +LOWPOWER,LOW POWER +SELFESTEEM,SELF ESTEEM +SEMITRANSPARENT,SEMI TRANSPARENT +SECONDINCOMMAND,SECOND IN COMMAND +HIGHRISE,HIGH RISE +COHOSTED,CO HOSTED +AFRICANAMERICAN,AFRICAN AMERICAN +SOUTHWEST,SOUTH WEST +WELLPRESERVED,WELL PRESERVED +FEATURELENGTH,FEATURE LENGTH +HIPHOP,HIP HOP +ALLBIG,ALL BIG +SOUTHEAST,SOUTH EAST +COUNTERATTACK,COUNTER ATTACK +QUARTERFINALS,QUARTER FINALS +STABLEDOOR,STABLE DOOR +DARKEYED,DARK EYED +ALLAMERICAN,ALL AMERICAN +THIRDPERSON,THIRD PERSON +LOWLEVEL,LOW LEVEL +NTERMINAL,N TERMINAL +DRIEDUP,DRIED UP +AFRICANAMERICANS,AFRICAN AMERICANS +ANTIAPARTHEID,ANTI APARTHEID +STOKEONTRENT,STOKE ON TRENT +NORTHNORTHEAST,NORTH NORTHEAST +BRANDNEW,BRAND NEW +RIGHTANGLED,RIGHT ANGLED +GOVERNMENTOWNED,GOVERNMENT OWNED +SONINLAW,SON IN LAW +SUBJECTOBJECTVERB,SUBJECT OBJECT VERB +LEFTARM,LEFT ARM +LONGLIVED,LONG LIVED +REDEYE,RED EYE +TPOSE,T POSE +NIGHTVISION,NIGHT VISION +SOUTHEASTERN,SOUTH EASTERN +WELLRECEIVED,WELL RECEIVED +ALFAYOUM,AL FAYOUM +TIMEBASED,TIME BASED +KETTLEDRUMS,KETTLE DRUMS +BRIGHTEYED,BRIGHT EYED +REDBROWN,RED BROWN +SAMESEX,SAME SEX +PORTDEPAIX,PORT DE PAIX +CLEANUP,CLEAN UP +PERCENT,PERCENT SIGN +TAKEOUT,TAKE OUT +KNOWHOW,KNOW HOW +FISHBONE,FISH BONE +FISHSTICKS,FISH STICKS +PAPERWORK,PAPER WORK +NICKNACKS,NICK NACKS +STREETTALKING,STREET TALKING +NONACADEMIC,NON ACADEMIC +SHELLY,SHELLEY +SHELLY'S,SHELLEY'S +JIMMY,JIMMIE +JIMMY'S,JIMMIE'S +DRUGSTORE,DRUG STORE +THRU,THROUGH +PLAYDATE,PLAY DATE +MICROLIFE,MICRO LIFE +SKILLSET,SKILL SET +SKILLSETS,SKILL SETS +TRADEOFF,TRADE OFF +TRADEOFFS,TRADE OFFS +ONSCREEN,ON SCREEN +PLAYBACK,PLAY BACK +ARTWORK,ART WORK +COWORKER,CO WORDER +COWORKERS,CO WORDERS +SOMETIME,SOME TIME +SOMETIMES,SOME TIMES +CROWDFUNDING,CROWD FUNDING +AM,A.M.,A M +PM,P.M.,P M +TV,T V +MBA,M B A +USA,U S A +US,U S +UK,U K +CEO,C E O +CFO,C F O +COO,C O O +CIO,C I O +FM,F M +GMC,G M C +FSC,F S C +NPD,N P D +APM,A P M +NGO,N G O +TD,T D +LOL,L O L +IPO,I P O +CNBC,C N B C +IPOS,I P OS +CNBC's,C N B C'S +JT,J T +NPR,N P R +NPR'S,N P R'S +MP,M P +IOI,I O I +DW,D W +CNN,C N N +WSM,W S M +ET,E T +IT,I T +RJ,R J +DVD,D V D +DVD'S,D V D'S +HBO,H B O +LA,L A +XC,X C +SUV,S U V +NBA,N B A +NBA'S,N B A'S +ESPN,E S P N +ESPN'S,E S P N'S +ADT,A D T +HD,H D +VIP,V I P +TMZ,T M Z +CBC,C B C +NPO,N P O +BBC,B B C +LA'S,L A'S +TMZ'S,T M Z'S +HIV,H I V +FTC,F T C +EU,E U +PHD,P H D +AI,A I +FHI,F H I +ICML,I C M L +ICLR,I C L R +BMW,B M W +EV,E V +CR,C R +API,A P I +ICO,I C O +LTE,L T E +OBS,O B S +PC,P C +IO,I O +CRM,C R M +RTMP,R T M P +ASMR,A S M R +GG,G G +WWW,W W W +PEI,P E I +JJ,J J +PT,P T +DJ,D J +SD,S D +POW,P.O.W.,P O W +FYI,F Y I +DC,D C,D.C +ABC,A B C +TJ,T J +WMDT,W M D T +WDTN,W D T N +TY,T Y +EJ,E J +CJ,C J +ACL,A C L +UK'S,U K'S +GTV,G T V +MDMA,M D M A +DFW,D F W +WTF,W T F +AJ,A J +MD,M D +PH,P H +ID,I D +SEO,S E O +UTM'S,U T M'S +EC,E C +UFC,U F C +RV,R V +UTM,U T M +CSV,C S V +SMS,S M S +GRB,G R B +GT,G T +LEM,L E M +XR,X R +EDU,E D U +NBC,N B C +EMS,E M S +CDC,C D C +MLK,M L K +IE,I E +OC,O C +HR,H R +MA,M A +DEE,D E E +AP,A P +UFO,U F O +DE,D E +LGBTQ,L G B T Q +PTA,P T A +NHS,N H S +CMA,C M A +MGM,M G M +AKA,A K A +HW,H W +GOP,G O P +GOP'S,G O P'S +FBI,F B I +PRX,P R X +CTO,C T O +URL,U R L +EIN,E I N +MLS,M L S +CSI,C S I +AOC,A O C +CND,C N D +CP,C P +PP,P P +CLI,C L I +PB,P B +FDA,F D A +MRNA,M R N A +PR,P R +VP,V P +DNC,D N C +MSNBC,M S N B C +GQ,G Q +UT,U T +XXI,X X I +HRV,H R V +WHO,W H O +CRO,C R O +DPA,D P A +PPE,P P E +EVA,E V A +BP,B P +GPS,G P S +AR,A R +PJ,P J +MLM,M L M +OLED,O L E D +BO,B O +VE,V E +UN,U N +SLS,S L S +DM,D M +DM'S,D M'S +ASAP,A S A P +ETA,E T A +DOB,D O B +BMW,B M W diff --git a/utils/speechio/interjections_en.csv b/utils/speechio/interjections_en.csv new file mode 100644 index 0000000..1afbd3b --- /dev/null +++ b/utils/speechio/interjections_en.csv @@ -0,0 +1,20 @@ +ach +ah +eee +eh +er +ew +ha +hee +hm +hmm +hmmm +huh +mm +mmm +oof +uh +uhh +um +oh +hum \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/README.md b/utils/speechio/nemo_text_processing/README.md new file mode 100644 index 0000000..63ea610 --- /dev/null +++ b/utils/speechio/nemo_text_processing/README.md @@ -0,0 +1 @@ +nemo_version from commit:eae1684f7f33c2a18de9ecfa42ec7db93d39e631 diff --git a/utils/speechio/nemo_text_processing/__init__.py b/utils/speechio/nemo_text_processing/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/README.md b/utils/speechio/nemo_text_processing/text_normalization/README.md new file mode 100644 index 0000000..d14e4d1 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/README.md @@ -0,0 +1,10 @@ +# Text Normalization + +Text Normalization is part of NeMo's `nemo_text_processing` - a Python package that is installed with the `nemo_toolkit`. +It converts text from written form into its verbalized form, e.g. "123" -> "one hundred twenty three". + +See [NeMo documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/nlp/text_normalization/wfst/wfst_text_normalization.html) for details. + +Tutorial with overview of the package capabilities: [Text_(Inverse)_Normalization.ipynb](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb) + +Tutorial on how to customize the underlying gramamrs: [WFST_Tutorial.ipynb](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/text_processing/WFST_Tutorial.ipynb) diff --git a/utils/speechio/nemo_text_processing/text_normalization/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/data_loader_utils.py b/utils/speechio/nemo_text_processing/text_normalization/data_loader_utils.py new file mode 100644 index 0000000..d6713c9 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/data_loader_utils.py @@ -0,0 +1,350 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import json +import re +import string +from collections import defaultdict, namedtuple +from typing import Dict, List, Optional, Set, Tuple +from unicodedata import category + + + +EOS_TYPE = "EOS" +PUNCT_TYPE = "PUNCT" +PLAIN_TYPE = "PLAIN" +Instance = namedtuple('Instance', 'token_type un_normalized normalized') +known_types = [ + "PLAIN", + "DATE", + "CARDINAL", + "LETTERS", + "VERBATIM", + "MEASURE", + "DECIMAL", + "ORDINAL", + "DIGIT", + "MONEY", + "TELEPHONE", + "ELECTRONIC", + "FRACTION", + "TIME", + "ADDRESS", +] + + +def _load_kaggle_text_norm_file(file_path: str) -> List[Instance]: + """ + https://www.kaggle.com/richardwilliamsproat/text-normalization-for-english-russian-and-polish + Loads text file in the Kaggle Google text normalization file format: \t\t<`self` if trivial class or normalized text> + E.g. + PLAIN Brillantaisia + PLAIN is + PLAIN a + PLAIN genus + PLAIN of + PLAIN plant + PLAIN in + PLAIN family + PLAIN Acanthaceae + PUNCT . sil + + + Args: + file_path: file path to text file + + Returns: flat list of instances + """ + res = [] + with open(file_path, 'r') as fp: + for line in fp: + parts = line.strip().split("\t") + if parts[0] == "": + res.append(Instance(token_type=EOS_TYPE, un_normalized="", normalized="")) + else: + l_type, l_token, l_normalized = parts + l_token = l_token.lower() + l_normalized = l_normalized.lower() + + if l_type == PLAIN_TYPE: + res.append(Instance(token_type=l_type, un_normalized=l_token, normalized=l_token)) + elif l_type != PUNCT_TYPE: + res.append(Instance(token_type=l_type, un_normalized=l_token, normalized=l_normalized)) + return res + + +def load_files(file_paths: List[str], load_func=_load_kaggle_text_norm_file) -> List[Instance]: + """ + Load given list of text files using the `load_func` function. + + Args: + file_paths: list of file paths + load_func: loading function + + Returns: flat list of instances + """ + res = [] + for file_path in file_paths: + res.extend(load_func(file_path=file_path)) + return res + + +def clean_generic(text: str) -> str: + """ + Cleans text without affecting semiotic classes. + + Args: + text: string + + Returns: cleaned string + """ + text = text.strip() + text = text.lower() + return text + + +def evaluate(preds: List[str], labels: List[str], input: Optional[List[str]] = None, verbose: bool = True) -> float: + """ + Evaluates accuracy given predictions and labels. + + Args: + preds: predictions + labels: labels + input: optional, only needed for verbosity + verbose: if true prints [input], golden labels and predictions + + Returns accuracy + """ + acc = 0 + nums = len(preds) + for i in range(nums): + pred_norm = clean_generic(preds[i]) + label_norm = clean_generic(labels[i]) + if pred_norm == label_norm: + acc = acc + 1 + else: + if input: + print(f"inpu: {json.dumps(input[i])}") + print(f"gold: {json.dumps(label_norm)}") + print(f"pred: {json.dumps(pred_norm)}") + return acc / nums + + +def training_data_to_tokens( + data: List[Instance], category: Optional[str] = None +) -> Dict[str, Tuple[List[str], List[str]]]: + """ + Filters the instance list by category if provided and converts it into a map from token type to list of un_normalized and normalized strings + + Args: + data: list of instances + category: optional semiotic class category name + + Returns Dict: token type -> (list of un_normalized strings, list of normalized strings) + """ + result = defaultdict(lambda: ([], [])) + for instance in data: + if instance.token_type != EOS_TYPE: + if category is None or instance.token_type == category: + result[instance.token_type][0].append(instance.un_normalized) + result[instance.token_type][1].append(instance.normalized) + return result + + +def training_data_to_sentences(data: List[Instance]) -> Tuple[List[str], List[str], List[Set[str]]]: + """ + Takes instance list, creates list of sentences split by EOS_Token + Args: + data: list of instances + Returns (list of unnormalized sentences, list of normalized sentences, list of sets of categories in a sentence) + """ + # split data at EOS boundaries + sentences = [] + sentence = [] + categories = [] + sentence_categories = set() + + for instance in data: + if instance.token_type == EOS_TYPE: + sentences.append(sentence) + sentence = [] + categories.append(sentence_categories) + sentence_categories = set() + else: + sentence.append(instance) + sentence_categories.update([instance.token_type]) + un_normalized = [" ".join([instance.un_normalized for instance in sentence]) for sentence in sentences] + normalized = [" ".join([instance.normalized for instance in sentence]) for sentence in sentences] + return un_normalized, normalized, categories + + +def post_process_punctuation(text: str) -> str: + """ + Normalized quotes and spaces + + Args: + text: text + + Returns: text with normalized spaces and quotes + """ + text = ( + text.replace('( ', '(') + .replace(' )', ')') + .replace('{ ', '{') + .replace(' }', '}') + .replace('[ ', '[') + .replace(' ]', ']') + .replace(' ', ' ') + .replace('”', '"') + .replace("’", "'") + .replace("»", '"') + .replace("«", '"') + .replace("\\", "") + .replace("„", '"') + .replace("´", "'") + .replace("’", "'") + .replace('“', '"') + .replace("‘", "'") + .replace('`', "'") + .replace('- -', "--") + ) + + for punct in "!,.:;?": + text = text.replace(f' {punct}', punct) + return text.strip() + + +def pre_process(text: str) -> str: + """ + Optional text preprocessing before normalization (part of TTS TN pipeline) + + Args: + text: string that may include semiotic classes + + Returns: text with spaces around punctuation marks + """ + space_both = '[]' + for punct in space_both: + text = text.replace(punct, ' ' + punct + ' ') + + # remove extra space + text = re.sub(r' +', ' ', text) + return text + + +def load_file(file_path: str) -> List[str]: + """ + Loads given text file with separate lines into list of string. + + Args: + file_path: file path + + Returns: flat list of string + """ + res = [] + with open(file_path, 'r') as fp: + for line in fp: + res.append(line) + return res + + +def write_file(file_path: str, data: List[str]): + """ + Writes out list of string to file. + + Args: + file_path: file path + data: list of string + + """ + with open(file_path, 'w') as fp: + for line in data: + fp.write(line + '\n') + + +def post_process_punct(input: str, normalized_text: str, add_unicode_punct: bool = False): + """ + Post-processing of the normalized output to match input in terms of spaces around punctuation marks. + After NN normalization, Moses detokenization puts a space after + punctuation marks, and attaches an opening quote "'" to the word to the right. + E.g., input to the TN NN model is "12 test' example", + after normalization and detokenization -> "twelve test 'example" (the quote is considered to be an opening quote, + but it doesn't match the input and can cause issues during TTS voice generation.) + The current function will match the punctuation and spaces of the normalized text with the input sequence. + "12 test' example" -> "twelve test 'example" -> "twelve test' example" (the quote was shifted to match the input). + + Args: + input: input text (original input to the NN, before normalization or tokenization) + normalized_text: output text (output of the TN NN model) + add_unicode_punct: set to True to handle unicode punctuation marks as well as default string.punctuation (increases post processing time) + """ + # in the post-processing WFST graph "``" are repalced with '"" quotes (otherwise single quotes "`" won't be handled correctly) + # this function fixes spaces around them based on input sequence, so here we're making the same double quote replacement + # to make sure these new double quotes work with this function + if "``" in input and "``" not in normalized_text: + input = input.replace("``", '"') + input = [x for x in input] + normalized_text = [x for x in normalized_text] + punct_marks = [x for x in string.punctuation if x in input] + + if add_unicode_punct: + punct_unicode = [ + chr(i) + for i in range(sys.maxunicode) + if category(chr(i)).startswith("P") and chr(i) not in punct_default and chr(i) in input + ] + punct_marks = punct_marks.extend(punct_unicode) + + for punct in punct_marks: + try: + equal = True + if input.count(punct) != normalized_text.count(punct): + equal = False + idx_in, idx_out = 0, 0 + while punct in input[idx_in:]: + idx_out = normalized_text.index(punct, idx_out) + idx_in = input.index(punct, idx_in) + + def _is_valid(idx_out, idx_in, normalized_text, input): + """Check if previous or next word match (for cases when punctuation marks are part of + semiotic token, i.e. some punctuation can be missing in the normalized text)""" + return (idx_out > 0 and idx_in > 0 and normalized_text[idx_out - 1] == input[idx_in - 1]) or ( + idx_out < len(normalized_text) - 1 + and idx_in < len(input) - 1 + and normalized_text[idx_out + 1] == input[idx_in + 1] + ) + + if not equal and not _is_valid(idx_out, idx_in, normalized_text, input): + idx_in += 1 + continue + if idx_in > 0 and idx_out > 0: + if normalized_text[idx_out - 1] == " " and input[idx_in - 1] != " ": + normalized_text[idx_out - 1] = "" + + elif normalized_text[idx_out - 1] != " " and input[idx_in - 1] == " ": + normalized_text[idx_out - 1] += " " + + if idx_in < len(input) - 1 and idx_out < len(normalized_text) - 1: + if normalized_text[idx_out + 1] == " " and input[idx_in + 1] != " ": + normalized_text[idx_out + 1] = "" + elif normalized_text[idx_out + 1] != " " and input[idx_in + 1] == " ": + normalized_text[idx_out] = normalized_text[idx_out] + " " + idx_out += 1 + idx_in += 1 + except: + pass + + normalized_text = "".join(normalized_text) + return re.sub(r' +', ' ', normalized_text) diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/__init__.py new file mode 100644 index 0000000..a9d7d97 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst +from nemo_text_processing.text_normalization.en.verbalizers.verbalize import VerbalizeFst +from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/clean_eval_data.py b/utils/speechio/nemo_text_processing/text_normalization/en/clean_eval_data.py new file mode 100644 index 0000000..8c33c4f --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/clean_eval_data.py @@ -0,0 +1,342 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argparse import ArgumentParser +from typing import List + +import regex as re +from nemo_text_processing.text_normalization.data_loader_utils import ( + EOS_TYPE, + Instance, + load_files, + training_data_to_sentences, +) + + +""" +This file is for evaluation purposes. +filter_loaded_data() cleans data (list of instances) for text normalization. Filters and cleaners can be specified for each semiotic class individually. +For example, normalized text should only include characters and whitespace characters but no punctuation. + Cardinal unnormalized instances should contain at least one integer and all other characters are removed. +""" + + +class Filter: + """ + Filter class + + Args: + class_type: semiotic class used in dataset + process_func: function to transform text + filter_func: function to filter text + + """ + + def __init__(self, class_type: str, process_func: object, filter_func: object): + self.class_type = class_type + self.process_func = process_func + self.filter_func = filter_func + + def filter(self, instance: Instance) -> bool: + """ + filter function + + Args: + filters given instance with filter function + + Returns: True if given instance fulfills criteria or does not belong to class type + """ + if instance.token_type != self.class_type: + return True + return self.filter_func(instance) + + def process(self, instance: Instance) -> Instance: + """ + process function + + Args: + processes given instance with process function + + Returns: processed instance if instance belongs to expected class type or original instance + """ + if instance.token_type != self.class_type: + return instance + return self.process_func(instance) + + +def filter_cardinal_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_cardinal_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + un_normalized = re.sub(r"[^0-9]", "", un_normalized) + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_ordinal_1(instance: Instance) -> bool: + ok = re.search(r"(st|nd|rd|th)\s*$", instance.un_normalized) + return ok + + +def process_ordinal_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + un_normalized = re.sub(r"[,\s]", "", un_normalized) + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_decimal_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_decimal_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + un_normalized = re.sub(r",", "", un_normalized) + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_measure_1(instance: Instance) -> bool: + ok = True + return ok + + +def process_measure_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + un_normalized = re.sub(r",", "", un_normalized) + un_normalized = re.sub(r"m2", "m²", un_normalized) + un_normalized = re.sub(r"(\d)([^\d.\s])", r"\1 \2", un_normalized) + normalized = re.sub(r"[^a-z\s]", "", normalized) + normalized = re.sub(r"per ([a-z\s]*)s$", r"per \1", normalized) + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_money_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_money_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + un_normalized = re.sub(r",", "", un_normalized) + un_normalized = re.sub(r"a\$", r"$", un_normalized) + un_normalized = re.sub(r"us\$", r"$", un_normalized) + un_normalized = re.sub(r"(\d)m\s*$", r"\1 million", un_normalized) + un_normalized = re.sub(r"(\d)bn?\s*$", r"\1 billion", un_normalized) + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_time_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_time_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + un_normalized = re.sub(r": ", ":", un_normalized) + un_normalized = re.sub(r"(\d)\s?a\s?m\s?", r"\1 a.m.", un_normalized) + un_normalized = re.sub(r"(\d)\s?p\s?m\s?", r"\1 p.m.", un_normalized) + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_plain_1(instance: Instance) -> bool: + ok = True + return ok + + +def process_plain_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_punct_1(instance: Instance) -> bool: + ok = True + return ok + + +def process_punct_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_date_1(instance: Instance) -> bool: + ok = True + return ok + + +def process_date_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + un_normalized = re.sub(r",", "", un_normalized) + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_letters_1(instance: Instance) -> bool: + ok = True + return ok + + +def process_letters_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_verbatim_1(instance: Instance) -> bool: + ok = True + return ok + + +def process_verbatim_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_digit_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_digit_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_telephone_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_telephone_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_electronic_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_electronic_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_fraction_1(instance: Instance) -> bool: + ok = re.search(r"[0-9]", instance.un_normalized) + return ok + + +def process_fraction_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +def filter_address_1(instance: Instance) -> bool: + ok = True + return ok + + +def process_address_1(instance: Instance) -> Instance: + un_normalized = instance.un_normalized + normalized = instance.normalized + normalized = re.sub(r"[^a-z ]", "", normalized) + return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized) + + +filters = [] +filters.append(Filter(class_type="CARDINAL", process_func=process_cardinal_1, filter_func=filter_cardinal_1)) +filters.append(Filter(class_type="ORDINAL", process_func=process_ordinal_1, filter_func=filter_ordinal_1)) +filters.append(Filter(class_type="DECIMAL", process_func=process_decimal_1, filter_func=filter_decimal_1)) +filters.append(Filter(class_type="MEASURE", process_func=process_measure_1, filter_func=filter_measure_1)) +filters.append(Filter(class_type="MONEY", process_func=process_money_1, filter_func=filter_money_1)) +filters.append(Filter(class_type="TIME", process_func=process_time_1, filter_func=filter_time_1)) + +filters.append(Filter(class_type="DATE", process_func=process_date_1, filter_func=filter_date_1)) +filters.append(Filter(class_type="PLAIN", process_func=process_plain_1, filter_func=filter_plain_1)) +filters.append(Filter(class_type="PUNCT", process_func=process_punct_1, filter_func=filter_punct_1)) +filters.append(Filter(class_type="LETTERS", process_func=process_letters_1, filter_func=filter_letters_1)) +filters.append(Filter(class_type="VERBATIM", process_func=process_verbatim_1, filter_func=filter_verbatim_1)) +filters.append(Filter(class_type="DIGIT", process_func=process_digit_1, filter_func=filter_digit_1)) +filters.append(Filter(class_type="TELEPHONE", process_func=process_telephone_1, filter_func=filter_telephone_1)) +filters.append(Filter(class_type="ELECTRONIC", process_func=process_electronic_1, filter_func=filter_electronic_1)) +filters.append(Filter(class_type="FRACTION", process_func=process_fraction_1, filter_func=filter_fraction_1)) +filters.append(Filter(class_type="ADDRESS", process_func=process_address_1, filter_func=filter_address_1)) +filters.append(Filter(class_type=EOS_TYPE, process_func=lambda x: x, filter_func=lambda x: True)) + + +def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]: + """ + Filters list of instances + + Args: + data: list of instances + + Returns: filtered and transformed list of instances + """ + updates_instances = [] + for instance in data: + updated_instance = False + for fil in filters: + if fil.class_type == instance.token_type and fil.filter(instance): + instance = fil.process(instance) + updated_instance = True + if updated_instance: + if verbose: + print(instance) + updates_instances.append(instance) + return updates_instances + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument("--input", help="input file path", type=str, default='./en_with_types/output-00001-of-00100') + parser.add_argument("--verbose", help="print filtered instances", action='store_true') + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + file_path = args.input + + print("Loading training data: " + file_path) + instance_list = load_files([file_path]) # List of instances + filtered_instance_list = filter_loaded_data(instance_list, args.verbose) + training_data_to_sentences(filtered_instance_list) diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/address/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/address/__init__.py new file mode 100644 index 0000000..7d200df --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/address/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/address/address_word.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/address/address_word.tsv new file mode 100644 index 0000000..2e9e716 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/address/address_word.tsv @@ -0,0 +1,14 @@ +st Street +street Street +expy Expressway +fwy Freeway +hwy Highway +dr Drive +ct Court +ave Avenue +av Avenue +cir Circle +blvd Boulevard +alley Alley +way Way +jct Junction \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/address/state.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/address/state.tsv new file mode 100644 index 0000000..921704f --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/address/state.tsv @@ -0,0 +1,52 @@ +Alabama AL +Alaska AK +Arizona AZ +Arkansas AR +California CA +Colorado CO +Connecticut CT +Delaware DE +Florida FL +Georgia GA +Hawaii HI +Idaho ID +Illinois IL +Indiana IN +Indiana IND +Iowa IA +Kansas KS +Kentucky KY +Louisiana LA +Maine ME +Maryland MD +Massachusetts MA +Michigan MI +Minnesota MN +Mississippi MS +Missouri MO +Montana MT +Nebraska NE +Nevada NV +New Hampshire NH +New Jersey NJ +New Mexico NM +New York NY +North Carolina NC +North Dakota ND +Ohio OH +Oklahoma OK +Oregon OR +Pennsylvania PA +Rhode Island RI +South Carolina SC +South Dakota SD +Tennessee TN +Tennessee TENN +Texas TX +Utah UT +Vermont VT +Virginia VA +Washington WA +West Virginia WV +Wisconsin WI +Wyoming WY \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/date/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/date/day.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/day.tsv new file mode 100644 index 0000000..ef8dd7a --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/day.tsv @@ -0,0 +1,31 @@ +one +two +three +four +five +six +seven +eight +nine +ten +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +eighteen +nineteen +twenty +twenty one +twenty two +twenty three +twenty four +twenty five +twenty six +twenty seven +twenty eight +twenty nine +thirty +thirty one \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_abbr.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_abbr.tsv new file mode 100644 index 0000000..e70e410 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_abbr.tsv @@ -0,0 +1,12 @@ +jan january +feb february +mar march +apr april +jun june +jul july +aug august +sep september +sept september +oct october +nov november +dec december diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_name.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_name.tsv new file mode 100644 index 0000000..1c05ac0 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_name.tsv @@ -0,0 +1,12 @@ +january +february +march +april +may +june +july +august +september +october +november +december diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_number.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_number.tsv new file mode 100644 index 0000000..f0cacd5 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/month_number.tsv @@ -0,0 +1,24 @@ +1 january +2 february +3 march +4 april +5 may +6 june +7 july +8 august +9 september +10 october +11 november +12 december +01 january +02 february +03 march +04 april +05 may +06 june +07 july +08 august +09 september +10 october +11 november +12 december \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/date/year_suffix.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/year_suffix.tsv new file mode 100644 index 0000000..8c799dd --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/date/year_suffix.tsv @@ -0,0 +1,16 @@ +A. D AD +A.D AD +a. d AD +a.d AD +a. d. AD +a.d. AD +B. C BC +B.C BC +b. c BC +b.c BC +A. D. AD +A.D. AD +B. C. BC +B.C. BC +b. c. BC +b.c. BC diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/__init__.py new file mode 100644 index 0000000..a1cf281 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/domain.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/domain.tsv new file mode 100644 index 0000000..0e7042c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/domain.tsv @@ -0,0 +1,12 @@ +.com dot com +.org dot org +.gov dot gov +.uk dot UK +.fr dot FR +.net dot net +.br dot BR +.in dot IN +.ru dot RU +.de dot DE +.it dot IT +.jpg dot jpeg \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/symbol.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/symbol.tsv new file mode 100644 index 0000000..f633c77 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/electronic/symbol.tsv @@ -0,0 +1,21 @@ +. dot +- dash +_ underscore +! exclamation mark +# number sign +$ dollar sign +% percent sign +& ampersand +' quote +* asterisk ++ plus +/ slash += equal sign +? question mark +^ circumflex +` right single quote +{ left brace +| vertical bar +} right brace +~ tilde +, comma \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/math_operation.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/math_operation.tsv new file mode 100644 index 0000000..21e72fd --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/math_operation.tsv @@ -0,0 +1,8 @@ ++ plus +- minus +/ divided +÷ divided +: divided +× times +* times +· times \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/unit.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/unit.tsv new file mode 100644 index 0000000..c033ab8 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/unit.tsv @@ -0,0 +1,127 @@ +amu atomic mass unit +bar bar +° degree +º degree +°c degree Celsius +°C degree Celsius +ºc degree Celsius +ºC degree Celsius +℃ degree Celsius +cm2 square centimeter +cm² square centimeter +cm3 cubic centimeter +cm³ cubic centimeter +cm centimeter +cwt hundredweight +db decibel +dm3 cubic decimeter +dm³ cubic decimeter +dm decimeter +ds decisecond +°f degree Fahrenheit +°F degree Fahrenheit +℉ degree Fahrenheit +ft foot +ghz gigahertz +gw gigawatt +gwh gigawatt hour +hz hertz +" inch +kbps kilobit per second +kcal kilo calory +kgf kilogram force +kg kilogram +khz kilohertz +km2 square kilometer +km² square kilometer +km3 cubic kilometer +km³ cubic kilometer +km kilometer +kpa kilopascal +kwh kilowatt hour +kw kilowatt +kW kilowatt +lb pound +lbs pound +m2 square meter +m² square meter +m3 cubic meter +m³ cubic meter +mbps megabit per second +mg milligram +mhz megahertz +mi2 square mile +mi² square mile +mi3 cubic mile +mi³ cubic mile +cu mi cubic mile +mi mile +min minute +ml milliliter +mm2 square millimeter +mm² square millimeter +mol mole +mpa megapascal +mph mile per hour +ng nanogram +nm nanometer +ns nanosecond +oz ounce +pa pascal +% percent +rad radian +rpm revolution per minute +sq ft square foot +sq mi square mile +sv sievert +tb terabyte +tj terajoule +tl teraliter +v volt +yd yard +μg microgram +μm micrometer +μs microsecond +ω ohm +atm ATM +au AU +bq BQ +cc CC +cd CD +da DA +eb EB +ev EV +f F +gb GB +g G +gl GL +gpa GPA +gy GY +ha HA +h H +hl HL +hp GP +hs HS +kb KB +kl KL +kn KN +kt KT +kv KV +lm LM +ma MA +mA MA +mb MB +mc MC +mf MF +m M +mm MM +ms MS +mv MV +mw MW +pb PB +pg PG +ps PS +s S +tb TB +tb YB +zb ZB \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/unit_alternatives.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/unit_alternatives.tsv new file mode 100644 index 0000000..77e4057 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/measure/unit_alternatives.tsv @@ -0,0 +1,43 @@ +atm atmosphere +bq becquerel +cd candela +da dalton +eb exabyte +f degree Fahrenheit +gb gigabyte +g gram +gl gigaliter +ha hectare +h hour +hl hectoliter +hp horsepower +hp horsepower +kb kilobit +kb kilobyte +ma megaampere +mA megaampere +ma milliampere +mA milliampere +mb megabyte +mc megacoulomb +mf megafarad +m meter +m minute +mm millimeter +mm millimeter +mm millimeter +ms megasecond +ms mega siemens +ms millisecond +mv millivolt +mV millivolt +mw megawatt +mW megawatt +pb petabyte +pg petagram +ps petasecond +s second +tb terabyte +tb terabyte +yb yottabyte +zb zettabyte diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/money/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_major.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_major.tsv new file mode 100644 index 0000000..5201efd --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_major.tsv @@ -0,0 +1,39 @@ +$ dollar +$ us dollar +US$ us dollar +฿ Thai Baht +£ pound +€ euro +₩ won +nzd new zealand dollar +rs rupee +chf swiss franc +dkk danish kroner +fim finnish markka +aed arab emirates dirham +¥ yen +czk czech koruna +mro mauritanian ouguiya +pkr pakistani rupee +crc costa rican colon +hk$ hong kong dollar +npr nepalese rupee +awg aruban florin +nok norwegian kroner +tzs tanzanian shilling +sek swedish kronor +cyp cypriot pound +r real +sar saudi riyal +cve cape verde escudo +rsd serbian dinar +dm german mark +shp saint helena pounds +php philippine peso +cad canadian dollar +ssp south sudanese pound +scr seychelles rupee +mvr maldivian rufiyaa +DH dirham +Dh dirham +Dhs. dirham diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_minor_plural.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_minor_plural.tsv new file mode 100644 index 0000000..a2f67b3 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_minor_plural.tsv @@ -0,0 +1,4 @@ +$ cents +US$ cents +€ cents +£ pence \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_minor_singular.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_minor_singular.tsv new file mode 100644 index 0000000..fe629d8 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/currency_minor_singular.tsv @@ -0,0 +1,3 @@ +$ cent +€ cent +£ penny \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/money/per_unit.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/per_unit.tsv new file mode 100644 index 0000000..6548066 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/money/per_unit.tsv @@ -0,0 +1,2 @@ +/ea each +/dozen \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/cardinal_number_name.far b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/cardinal_number_name.far new file mode 100644 index 0000000..2ec6825 Binary files /dev/null and b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/cardinal_number_name.far differ diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/digit.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/digit.tsv new file mode 100644 index 0000000..fa32979 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/digit.tsv @@ -0,0 +1,9 @@ +one 1 +two 2 +three 3 +four 4 +five 5 +six 6 +seven 7 +eight 8 +nine 9 \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/fraction.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/fraction.tsv new file mode 100644 index 0000000..0085ad5 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/fraction.tsv @@ -0,0 +1,18 @@ +¼ 1/4 +½ 1/2 +¾ 3/4 +⅐ 1/7 +⅑ 1/9 +⅒ 1/10 +⅓ 1/3 +⅔ 2/3 +⅕ 1/5 +⅖ 2/5 +⅗ 3/5 +⅘ 4/5 +⅙ 1/6 +⅚ 5/6 +⅛ 1/8 +⅜ 3/8 +⅝ 5/8 +⅞ 7/8 diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/hundred.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/hundred.tsv new file mode 100644 index 0000000..10e2b96 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/hundred.tsv @@ -0,0 +1 @@ +hundred \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/quantity_abbr.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/quantity_abbr.tsv new file mode 100644 index 0000000..7dba4f0 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/quantity_abbr.tsv @@ -0,0 +1,10 @@ +M million +MLN million +m million +mln million +B billion +b billion +BN billion +bn billion +K thousand +k thousand \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/teen.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/teen.tsv new file mode 100644 index 0000000..8e60fa1 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/teen.tsv @@ -0,0 +1,10 @@ +ten 10 +eleven 11 +twelve 12 +thirteen 13 +fourteen 14 +fifteen 15 +sixteen 16 +seventeen 17 +eighteen 18 +nineteen 19 \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/thousand.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/thousand.tsv new file mode 100644 index 0000000..bf30542 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/thousand.tsv @@ -0,0 +1,22 @@ +thousand +million +billion +trillion +quadrillion +quintillion +sextillion +septillion +octillion +nonillion +decillion +undecillion +duodecillion +tredecillion +quattuordecillion +quindecillion +sexdecillion +septendecillion +octodecillion +novemdecillion +vigintillion +centillion \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/ty.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/ty.tsv new file mode 100644 index 0000000..65f9839 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/ty.tsv @@ -0,0 +1,8 @@ +twenty 2 +thirty 3 +forty 4 +fifty 5 +sixty 6 +seventy 7 +eighty 8 +ninety 9 \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/number/zero.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/zero.tsv new file mode 100644 index 0000000..a1b116c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/number/zero.tsv @@ -0,0 +1 @@ +zero 0 diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/digit.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/digit.tsv new file mode 100644 index 0000000..4b7fc24 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/digit.tsv @@ -0,0 +1,9 @@ +first one +second two +third three +fourth four +fifth five +sixth sixth +seventh seven +eighth eight +ninth nine \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/teen.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/teen.tsv new file mode 100644 index 0000000..496fefc --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/ordinal/teen.tsv @@ -0,0 +1 @@ +twelfth twelve \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/README.md b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/README.md new file mode 100644 index 0000000..398e197 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/README.md @@ -0,0 +1,20 @@ +`female.tsv` - List of common female names. Copyright (c) January 1991 by Mark Kantrowitz, 4987 names, Version 1.3 (29-MAR-94) +Source: [https://www.cs.cmu.edu/Groups/AI/areas/nlp/corpora/names/female.txt](https://www.cs.cmu.edu/Groups/AI/areas/nlp/corpora/names/female.txt) + +`male.tsv` - List of common male names. Copyright (c) January 1991 by Mark Kantrowitz, 2940 names, Version 1.3 (29-MAR-94) +Source: [https://www.cs.cmu.edu/Groups/AI/areas/nlp/corpora/names/male.txt](https://www.cs.cmu.edu/Groups/AI/areas/nlp/corpora/names/male.txt) + +[Corpora Readme.txt](https://www.cs.cmu.edu/Groups/AI/areas/nlp/corpora/names/readme.txt): + +You may use the lists of names for any purpose, so long as credit is given +in any published work. You may also redistribute the list if you +provide the recipients with a copy of this README file. The lists are +not in the public domain (I retain the copyright on the lists) but are +freely redistributable. + +If you have any additions to the lists of names, I would appreciate +receiving them. + +My email address is mkant+@cs.cmu.edu. + +Mark Kantrowitz \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/female.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/female.tsv new file mode 100644 index 0000000..4e248ce --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/female.tsv @@ -0,0 +1,4998 @@ +Abagael +Abagail +Abbe +Abbey +Abbi +Abbie +Abby +Abigael +Abigail +Abigale +Abra +Acacia +Ada +Adah +Adaline +Adara +Addie +Addis +Adel +Adela +Adelaide +Adele +Adelice +Adelina +Adelind +Adeline +Adella +Adelle +Adena +Adey +Adi +Adiana +Adina +Adora +Adore +Adoree +Adorne +Adrea +Adria +Adriaens +Adrian +Adriana +Adriane +Adrianna +Adrianne +Adrien +Adriena +Adrienne +Aeriel +Aeriela +Aeriell +Ag +Agace +Agata +Agatha +Agathe +Aggi +Aggie +Aggy +Agna +Agnella +Agnes +Agnese +Agnesse +Agneta +Agnola +Agretha +Aida +Aidan +Aigneis +Aila +Aile +Ailee +Aileen +Ailene +Ailey +Aili +Ailina +Ailyn +Aime +Aimee +Aimil +Aina +Aindrea +Ainslee +Ainsley +Ainslie +Ajay +Alaine +Alameda +Alana +Alanah +Alane +Alanna +Alayne +Alberta +Albertina +Albertine +Albina +Alecia +Aleda +Aleece +Aleecia +Aleen +Alejandra +Alejandrina +Alena +Alene +Alessandra +Aleta +Alethea +Alex +Alexa +Alexandra +Alexandrina +Alexi +Alexia +Alexina +Alexine +Alexis +Alfie +Alfreda +Ali +Alia +Alica +Alice +Alicea +Alicia +Alida +Alidia +Alina +Aline +Alis +Alisa +Alisha +Alison +Alissa +Alisun +Alix +Aliza +Alla +Alleen +Allegra +Allene +Alli +Allianora +Allie +Allina +Allis +Allison +Allissa +Allsun +Ally +Allyce +Allyn +Allys +Allyson +Alma +Almeda +Almeria +Almeta +Almira +Almire +Aloise +Aloisia +Aloysia +Alpa +Alta +Althea +Alvera +Alvina +Alvinia +Alvira +Alyce +Alyda +Alys +Alysa +Alyse +Alysia +Alyson +Alyss +Alyssa +Amabel +Amabelle +Amalea +Amalee +Amaleta +Amalia +Amalie +Amalita +Amalle +Amanda +Amandi +Amandie +Amandy +Amara +Amargo +Amata +Amber +Amberly +Ambrosia +Ambur +Ame +Amelia +Amelie +Amelina +Ameline +Amelita +Ami +Amie +Amity +Ammamaria +Amy +Ana +Anabel +Anabella +Anabelle +Anais +Analiese +Analise +Anallese +Anallise +Anastasia +Anastasie +Anastassia +Anatola +Andee +Andi +Andie +Andra +Andrea +Andreana +Andree +Andrei +Andria +Andriana +Andriette +Andromache +Andromeda +Andy +Anestassia +Anet +Anett +Anetta +Anette +Ange +Angel +Angela +Angele +Angelia +Angelica +Angelika +Angelina +Angeline +Angelique +Angelita +Angelle +Angie +Angil +Angy +Ania +Anica +Anissa +Anita +Anitra +Anja +Anjanette +Anjela +Ann +Ann-Mari +Ann-Marie +Anna +Anna-Diana +Anna-Diane +Anna-Maria +Annabal +Annabel +Annabela +Annabell +Annabella +Annabelle +Annadiana +Annadiane +Annalee +Annalena +Annaliese +Annalisa +Annalise +Annalyse +Annamari +Annamaria +Annamarie +Anne +Anne-Corinne +Anne-Mar +Anne-Marie +Annecorinne +Anneliese +Annelise +Annemarie +Annetta +Annette +Anni +Annice +Annie +Annissa +Annmaria +Annmarie +Annnora +Annora +Anny +Anselma +Ansley +Anstice +Anthe +Anthea +Anthia +Antoinette +Antonella +Antonetta +Antonia +Antonie +Antonietta +Antonina +Anya +Aphrodite +Appolonia +Aprilette +Ara +Arabel +Arabela +Arabele +Arabella +Arabelle +Arda +Ardath +Ardeen +Ardelia +Ardelis +Ardella +Ardelle +Arden +Ardene +Ardenia +Ardine +Ardis +Ardith +Ardra +Ardyce +Ardys +Ardyth +Aretha +Ariadne +Ariana +Arianne +Aridatha +Ariel +Ariela +Ariella +Arielle +Arlana +Arlee +Arleen +Arlen +Arlena +Arlene +Arleta +Arlette +Arleyne +Arlie +Arliene +Arlina +Arlinda +Arline +Arly +Arlyn +Arlyne +Aryn +Ashely +Ashlee +Ashleigh +Ashlen +Ashley +Ashli +Ashlie +Ashly +Asia +Astra +Astrid +Astrix +Atalanta +Athena +Athene +Atlanta +Atlante +Auberta +Aubine +Aubree +Aubrette +Aubrey +Aubrie +Aubry +Audi +Audie +Audra +Audre +Audrey +Audrie +Audry +Audrye +Audy +Augusta +Auguste +Augustina +Augustine +Aura +Aurea +Aurel +Aurelea +Aurelia +Aurelie +Auria +Aurie +Aurilia +Aurlie +Auroora +Aurora +Aurore +Austin +Austina +Austine +Ava +Aveline +Averil +Averyl +Avie +Avis +Aviva +Avivah +Avril +Avrit +Ayn +Bab +Babara +Babette +Babita +Babs +Bambi +Bambie +Bamby +Barb +Barbabra +Barbara +Barbara-Anne +Barbaraanne +Barbe +Barbee +Barbette +Barbey +Barbi +Barbie +Barbra +Barby +Bari +Barrie +Barry +Basia +Bathsheba +Batsheva +Bea +Beatrice +Beatrisa +Beatrix +Beatriz +Beau +Bebe +Becca +Becka +Becki +Beckie +Becky +Bee +Beilul +Beitris +Bekki +Bel +Belia +Belicia +Belinda +Belita +Bell +Bella +Bellamy +Bellanca +Belle +Bellina +Belva +Belvia +Bendite +Benedetta +Benedicta +Benedikta +Benetta +Benita +Benni +Bennie +Benny +Benoite +Berenice +Beret +Berget +Berna +Bernadene +Bernadette +Bernadina +Bernadine +Bernardina +Bernardine +Bernelle +Bernete +Bernetta +Bernette +Berni +Bernice +Bernie +Bernita +Berny +Berri +Berrie +Berry +Bert +Berta +Berte +Bertha +Berthe +Berti +Bertie +Bertina +Bertine +Berty +Beryl +Beryle +Bess +Bessie +Bessy +Beth +Bethanne +Bethany +Bethena +Bethina +Betsey +Betsy +Betta +Bette +Bette-Ann +Betteann +Betteanne +Betti +Bettie +Bettina +Bettine +Betty +Bettye +Beulah +Bev +Beverie +Beverlee +Beverlie +Beverly +Bevvy +Bianca +Bianka +Biddy +Bidget +Bill +Billi +Billie +Billy +Binni +Binnie +Binny +Bird +Birdie +Birgit +Birgitta +Blair +Blaire +Blake +Blakelee +Blakeley +Blanca +Blanch +Blancha +Blanche +Blinni +Blinnie +Blinny +Bliss +Blisse +Blithe +Blondell +Blondelle +Blondie +Blondy +Blythe +Bo +Bobbette +Bobbi +Bobbie +Bobby +Bobette +Bobina +Bobine +Bobinette +Bonita +Bonnee +Bonni +Bonnie +Bonny +Brana +Brandais +Brande +Brandea +Brandi +Brandice +Brandie +Brandise +Brandy +Brea +Breanne +Brear +Bree +Breena +Bren +Brena +Brenda +Brenn +Brenna +Brett +Bria +Briana +Brianna +Brianne +Bride +Bridget +Bridgett +Bridgette +Bridie +Brier +Brietta +Brigid +Brigida +Brigit +Brigitta +Brigitte +Brina +Briney +Briny +Brit +Brita +Britaney +Britani +Briteny +Britney +Britni +Britt +Britta +Brittan +Brittany +Britte +Brittney +Brook +Brooke +Brooks +Brunella +Brunhilda +Brunhilde +Bryana +Bryn +Bryna +Brynn +Brynna +Brynne +Buffy +Bunni +Bunnie +Bunny +Burta +Cabrina +Cacilia +Cacilie +Caitlin +Caitrin +Cal +Calida +Calla +Calley +Calli +Callida +Callie +Cally +Calypso +Cam +Camala +Camel +Camella +Camellia +Cameo +Cami +Camila +Camile +Camilla +Camille +Cammi +Cammie +Cammy +Canada +Candace +Candi +Candice +Candida +Candide +Candie +Candis +Candra +Candy +Cappella +Caprice +Cara +Caralie +Caren +Carena +Caresa +Caressa +Caresse +Carey +Cari +Caria +Carie +Caril +Carilyn +Carin +Carina +Carine +Cariotta +Carissa +Carita +Caritta +Carla +Carlee +Carleen +Carlen +Carlena +Carlene +Carley +Carli +Carlie +Carlin +Carlina +Carline +Carlisle +Carlita +Carlota +Carlotta +Carly +Carlye +Carlyn +Carlynn +Carlynne +Carma +Carmel +Carmela +Carmelia +Carmelina +Carmelita +Carmella +Carmelle +Carmen +Carmina +Carmine +Carmita +Carmon +Caro +Carol +Carol-Jean +Carola +Carolan +Carolann +Carole +Carolee +Caroleen +Carolie +Carolin +Carolina +Caroline +Caroljean +Carolyn +Carolyne +Carolynn +Caron +Carree +Carri +Carrie +Carrissa +Carrol +Carroll +Carry +Cary +Caryl +Caryn +Casandra +Casey +Casi +Casia +Casie +Cass +Cassandra +Cassandre +Cassandry +Cassaundra +Cassey +Cassi +Cassie +Cassondra +Cassy +Cat +Catarina +Cate +Caterina +Catha +Catharina +Catharine +Cathe +Cathee +Catherin +Catherina +Catherine +Cathi +Cathie +Cathleen +Cathlene +Cathrin +Cathrine +Cathryn +Cathy +Cathyleen +Cati +Catie +Catina +Catlaina +Catlee +Catlin +Catrina +Catriona +Caty +Cayla +Cecelia +Cecil +Cecile +Ceciley +Cecilia +Cecilla +Cecily +Ceil +Cele +Celene +Celesta +Celeste +Celestia +Celestina +Celestine +Celestyn +Celestyna +Celia +Celie +Celina +Celinda +Celine +Celinka +Celisse +Celle +Cesya +Chad +Chanda +Chandal +Chandra +Channa +Chantal +Chantalle +Charil +Charin +Charis +Charissa +Charisse +Charita +Charity +Charla +Charlean +Charleen +Charlena +Charlene +Charline +Charlot +Charlott +Charlotta +Charlotte +Charmain +Charmaine +Charmane +Charmian +Charmine +Charmion +Charo +Charyl +Chastity +Chelsae +Chelsea +Chelsey +Chelsie +Chelsy +Cher +Chere +Cherey +Cheri +Cherianne +Cherice +Cherida +Cherie +Cherilyn +Cherilynn +Cherin +Cherise +Cherish +Cherlyn +Cherri +Cherrita +Cherry +Chery +Cherye +Cheryl +Cheslie +Chiarra +Chickie +Chicky +Chiquita +Chloe +Chloette +Chloris +Chris +Chriss +Chrissa +Chrissie +Chrissy +Christa +Christabel +Christabella +Christabelle +Christal +Christalle +Christan +Christean +Christel +Christen +Christi +Christian +Christiana +Christiane +Christie +Christin +Christina +Christine +Christy +Christyna +Chrysa +Chrysler +Chrystal +Chryste +Chrystel +Ciara +Cicely +Cicily +Ciel +Cilka +Cinda +Cindee +Cindelyn +Cinderella +Cindi +Cindie +Cindra +Cindy +Cinnamon +Cissie +Cissy +Clair +Claire +Clara +Clarabelle +Clare +Claresta +Clareta +Claretta +Clarette +Clarey +Clari +Claribel +Clarice +Clarie +Clarinda +Clarine +Clarisa +Clarissa +Clarisse +Clarita +Clary +Claude +Claudelle +Claudetta +Claudette +Claudia +Claudie +Claudina +Claudine +Clea +Clem +Clemence +Clementia +Clementina +Clementine +Clemmie +Clemmy +Cleo +Cleopatra +Clerissa +Cleva +Clio +Clo +Cloe +Cloris +Clotilda +Clovis +Codee +Codi +Codie +Cody +Coleen +Colene +Coletta +Colette +Colleen +Collete +Collette +Collie +Colline +Colly +Con +Concettina +Conchita +Concordia +Conney +Conni +Connie +Conny +Consolata +Constance +Constancia +Constancy +Constanta +Constantia +Constantina +Constantine +Consuela +Consuelo +Cookie +Cora +Corabel +Corabella +Corabelle +Coral +Coralie +Coraline +Coralyn +Cordelia +Cordelie +Cordey +Cordie +Cordula +Cordy +Coreen +Corella +Corena +Corenda +Corene +Coretta +Corette +Corey +Cori +Corie +Corilla +Corina +Corine +Corinna +Corinne +Coriss +Corissa +Corliss +Corly +Cornela +Cornelia +Cornelle +Cornie +Corny +Correna +Correy +Corri +Corrianne +Corrie +Corrina +Corrine +Corrinne +Corry +Cortney +Cory +Cosetta +Cosette +Courtenay +Courtney +Cresa +Cris +Crissie +Crissy +Crista +Cristabel +Cristal +Cristen +Cristi +Cristie +Cristin +Cristina +Cristine +Cristionna +Cristy +Crysta +Crystal +Crystie +Cyb +Cybal +Cybel +Cybelle +Cybil +Cybill +Cyndi +Cyndy +Cynthea +Cynthia +Cynthie +Cynthy +Dacey +Dacia +Dacie +Dacy +Dael +Daffi +Daffie +Daffy +Dafna +Dagmar +Dahlia +Daile +Daisey +Daisi +Daisie +Daisy +Dale +Dalenna +Dalia +Dalila +Dallas +Daloris +Damara +Damaris +Damita +Dana +Danell +Danella +Danelle +Danette +Dani +Dania +Danica +Danice +Daniel +Daniela +Daniele +Daniella +Danielle +Danika +Danila +Danit +Danita +Danna +Danni +Dannie +Danny +Dannye +Danya +Danyelle +Danyette +Daphene +Daphna +Daphne +Dara +Darb +Darbie +Darby +Darcee +Darcey +Darci +Darcie +Darcy +Darda +Dareen +Darell +Darelle +Dari +Daria +Darice +Darla +Darleen +Darlene +Darline +Darryl +Darsey +Darsie +Darya +Daryl +Daryn +Dasha +Dasi +Dasie +Dasya +Datha +Daune +Daveen +Daveta +Davida +Davina +Davine +Davita +Dawn +Dawna +Dayle +Dayna +Dea +Deana +Deane +Deanna +Deanne +Deb +Debbi +Debbie +Debbra +Debby +Debee +Debera +Debi +Debor +Debora +Deborah +Debra +Dede +Dedie +Dedra +Dee +Dee Dee +Deeann +Deeanne +Deedee +Deena +Deerdre +Dehlia +Deidre +Deina +Deirdre +Del +Dela +Delaney +Delcina +Delcine +Delia +Delila +Delilah +Delinda +Dell +Della +Delly +Delora +Delores +Deloria +Deloris +Delphina +Delphine +Delphinia +Demeter +Demetra +Demetria +Demetris +Dena +Deni +Denice +Denise +Denna +Denni +Dennie +Denny +Deny +Denys +Denyse +Deonne +Desaree +Desdemona +Desirae +Desiree +Desiri +Deva +Devan +Devi +Devin +Devina +Devinne +Devon +Devondra +Devonna +Devonne +Devora +Dew +Di +Diahann +Diamond +Dian +Diana +Diandra +Diane +Diane-Marie +Dianemarie +Diann +Dianna +Dianne +Diannne +Didi +Dido +Diena +Dierdre +Dina +Dinah +Dinnie +Dinny +Dion +Dione +Dionis +Dionne +Dita +Dix +Dixie +Dode +Dodi +Dodie +Dody +Doe +Doll +Dolley +Dolli +Dollie +Dolly +Dolora +Dolores +Dolorita +Doloritas +Dominica +Dominique +Dona +Donella +Donelle +Donetta +Donia +Donica +Donielle +Donna +Donnajean +Donnamarie +Donni +Donnie +Donny +Dora +Doralia +Doralin +Doralyn +Doralynn +Doralynne +Dorcas +Dore +Doreen +Dorelia +Dorella +Dorelle +Dorena +Dorene +Doretta +Dorette +Dorey +Dori +Doria +Dorian +Dorice +Dorie +Dorine +Doris +Dorisa +Dorise +Dorit +Dorita +Doro +Dorolice +Dorolisa +Dorotea +Doroteya +Dorothea +Dorothee +Dorothy +Dorree +Dorri +Dorrie +Dorris +Dorry +Dorthea +Dorthy +Dory +Dosi +Dot +Doti +Dotti +Dottie +Dotty +Dove +Drea +Drew +Dulce +Dulcea +Dulci +Dulcia +Dulciana +Dulcie +Dulcine +Dulcinea +Dulcy +Dulsea +Dusty +Dyan +Dyana +Dyane +Dyann +Dyanna +Dyanne +Dyna +Dynah +E'Lane +Eada +Eadie +Eadith +Ealasaid +Eartha +Easter +Eba +Ebba +Ebonee +Ebony +Eda +Eddi +Eddie +Eddy +Ede +Edee +Edeline +Eden +Edi +Edie +Edin +Edita +Edith +Editha +Edithe +Ediva +Edna +Edwina +Edy +Edyth +Edythe +Effie +Eileen +Eilis +Eimile +Eirena +Ekaterina +Elaina +Elaine +Elana +Elane +Elayne +Elberta +Elbertina +Elbertine +Eleanor +Eleanora +Eleanore +Electra +Elena +Elene +Eleni +Elenore +Eleonora +Eleonore +Elfie +Elfreda +Elfrida +Elfrieda +Elga +Elianora +Elianore +Elicia +Elie +Elinor +Elinore +Elisa +Elisabet +Elisabeth +Elisabetta +Elise +Elisha +Elissa +Elita +Eliza +Elizabet +Elizabeth +Elka +Elke +Ella +Elladine +Elle +Ellen +Ellene +Ellette +Elli +Ellie +Ellissa +Elly +Ellyn +Ellynn +Elmira +Elna +Elnora +Elnore +Eloisa +Eloise +Elonore +Elora +Elsa +Elsbeth +Else +Elsey +Elsi +Elsie +Elsinore +Elspeth +Elsy +Elva +Elvera +Elvina +Elvira +Elwina +Elwira +Elyn +Elyse +Elysee +Elysha +Elysia +Elyssa +Em +Ema +Emalee +Emalia +Emanuela +Emelda +Emelia +Emelina +Emeline +Emelita +Emelyne +Emera +Emilee +Emili +Emilia +Emilie +Emiline +Emily +Emlyn +Emlynn +Emlynne +Emma +Emmalee +Emmaline +Emmalyn +Emmalynn +Emmalynne +Emmeline +Emmey +Emmi +Emmie +Emmy +Emmye +Emogene +Emyle +Emylee +Endora +Engracia +Enid +Enrica +Enrichetta +Enrika +Enriqueta +Enya +Eolanda +Eolande +Eran +Erda +Erena +Erica +Ericha +Ericka +Erika +Erin +Erina +Erinn +Erinna +Erma +Ermengarde +Ermentrude +Ermina +Erminia +Erminie +Erna +Ernaline +Ernesta +Ernestine +Ertha +Eryn +Esma +Esmaria +Esme +Esmeralda +Esmerelda +Essa +Essie +Essy +Esta +Estel +Estele +Estell +Estella +Estelle +Ester +Esther +Estrella +Estrellita +Ethel +Ethelda +Ethelin +Ethelind +Etheline +Ethelyn +Ethyl +Etta +Etti +Ettie +Etty +Eudora +Eugenia +Eugenie +Eugine +Eula +Eulalie +Eunice +Euphemia +Eustacia +Eva +Evaleen +Evangelia +Evangelin +Evangelina +Evangeline +Evania +Evanne +Eve +Eveleen +Evelina +Eveline +Evelyn +Evette +Evey +Evie +Evita +Evonne +Evvie +Evvy +Evy +Eyde +Eydie +Fabrianne +Fabrice +Fae +Faina +Faith +Fallon +Fan +Fanchette +Fanchon +Fancie +Fancy +Fanechka +Fania +Fanni +Fannie +Fanny +Fanya +Fara +Farah +Farand +Farica +Farra +Farrah +Farrand +Fatima +Faun +Faunie +Faustina +Faustine +Fawn +Fawna +Fawne +Fawnia +Fay +Faydra +Faye +Fayette +Fayina +Fayre +Fayth +Faythe +Federica +Fedora +Felecia +Felicdad +Felice +Felicia +Felicity +Felicle +Felipa +Felisha +Felita +Feliza +Fenelia +Feodora +Ferdinanda +Ferdinande +Fern +Fernanda +Fernande +Fernandina +Ferne +Fey +Fiann +Fianna +Fidela +Fidelia +Fidelity +Fifi +Fifine +Filia +Filide +Filippa +Fina +Fiona +Fionna +Fionnula +Fiorenze +Fleur +Fleurette +Flo +Flor +Flora +Florance +Flore +Florella +Florence +Florencia +Florentia +Florenza +Florette +Flori +Floria +Florice +Florida +Florie +Florina +Florinda +Floris +Florri +Florrie +Florry +Flory +Flossi +Flossie +Flossy +Flower +Fortuna +Fortune +Fran +France +Francene +Frances +Francesca +Francesmary +Francine +Francis +Francisca +Franciska +Francoise +Francyne +Frank +Frankie +Franky +Franni +Frannie +Franny +Frayda +Fred +Freda +Freddi +Freddie +Freddy +Fredelia +Frederica +Fredericka +Fredi +Fredia +Fredra +Fredrika +Freida +Frieda +Friederike +Fulvia +Gabbey +Gabbi +Gabbie +Gabey +Gabi +Gabie +Gabriel +Gabriela +Gabriell +Gabriella +Gabrielle +Gabriellia +Gabrila +Gaby +Gae +Gael +Gail +Gale +Gale +Galina +Garland +Garnet +Garnette +Gates +Gavra +Gavrielle +Gay +Gayla +Gayle +Gayleen +Gaylene +Gaynor +Gelya +Gen +Gena +Gene +Geneva +Genevieve +Genevra +Genia +Genna +Genni +Gennie +Gennifer +Genny +Genovera +Genvieve +George +Georgeanna +Georgeanne +Georgena +Georgeta +Georgetta +Georgette +Georgia +Georgiamay +Georgiana +Georgianna +Georgianne +Georgie +Georgina +Georgine +Gera +Geralda +Geraldina +Geraldine +Gerda +Gerhardine +Geri +Gerianna +Gerianne +Gerladina +Germain +Germaine +Germana +Gerri +Gerrie +Gerrilee +Gerry +Gert +Gerta +Gerti +Gertie +Gertrud +Gertruda +Gertrude +Gertrudis +Gerty +Giacinta +Giana +Gianina +Gianna +Gigi +Gilberta +Gilberte +Gilbertina +Gilbertine +Gilda +Gill +Gillan +Gilli +Gillian +Gillie +Gilligan +Gilly +Gina +Ginelle +Ginevra +Ginger +Ginni +Ginnie +Ginnifer +Ginny +Giorgia +Giovanna +Gipsy +Giralda +Gisela +Gisele +Gisella +Giselle +Gizela +Glad +Gladi +Gladis +Gladys +Gleda +Glen +Glenda +Glenine +Glenn +Glenna +Glennie +Glennis +Glori +Gloria +Gloriana +Gloriane +Glorianna +Glory +Glyn +Glynda +Glynis +Glynnis +Godiva +Golda +Goldarina +Goldi +Goldia +Goldie +Goldina +Goldy +Grace +Gracia +Gracie +Grata +Gratia +Gratiana +Gray +Grayce +Grazia +Gredel +Greer +Greta +Gretal +Gretchen +Grete +Gretel +Grethel +Gretna +Gretta +Grier +Griselda +Grissel +Guendolen +Guenevere +Guenna +Guglielma +Gui +Guillema +Guillemette +Guinevere +Guinna +Gunilla +Gunvor +Gus +Gusella +Gussi +Gussie +Gussy +Gusta +Gusti +Gustie +Gusty +Gwen +Gwendolen +Gwendolin +Gwendolyn +Gweneth +Gwenette +Gwenn +Gwenneth +Gwenni +Gwennie +Gwenny +Gwenora +Gwenore +Gwyn +Gwyneth +Gwynne +Gypsy +Hadria +Hailee +Haily +Haleigh +Halette +Haley +Hali +Halie +Halimeda +Halley +Halli +Hallie +Hally +Hana +Hanna +Hannah +Hanni +Hannibal +Hannie +Hannis +Hanny +Happy +Harlene +Harley +Harli +Harlie +Harmonia +Harmonie +Harmony +Harri +Harrie +Harriet +Harriett +Harrietta +Harriette +Harriot +Harriott +Hatti +Hattie +Hatty +Havivah +Hayley +Hazel +Heath +Heather +Heda +Hedda +Heddi +Heddie +Hedi +Hedvig +Hedwig +Hedy +Heida +Heide +Heidi +Heidie +Helaina +Helaine +Helen +Helen-Elizabeth +Helena +Helene +Helga +Helge +Helise +Hellene +Helli +Heloise +Helsa +Helyn +Hendrika +Henka +Henrie +Henrieta +Henrietta +Henriette +Henryetta +Hephzibah +Hermia +Hermina +Hermine +Herminia +Hermione +Herta +Hertha +Hester +Hesther +Hestia +Hetti +Hettie +Hetty +Hilarie +Hilary +Hilda +Hildagard +Hildagarde +Hilde +Hildegaard +Hildegarde +Hildy +Hillary +Hilliary +Hinda +Holley +Holli +Hollie +Holly +Holly-Anne +Hollyanne +Honey +Honor +Honoria +Hope +Horatia +Hortense +Hortensia +Hulda +Hyacinth +Hyacintha +Hyacinthe +Hyacinthia +Hyacinthie +Hynda +Ianthe +Ibbie +Ibby +Ida +Idalia +Idalina +Idaline +Idell +Idelle +Idette +Ike +Ikey +Ilana +Ileana +Ileane +Ilene +Ilise +Ilka +Illa +Ilona +Ilsa +Ilse +Ilysa +Ilyse +Ilyssa +Imelda +Imogen +Imogene +Imojean +Ina +Inci +Indira +Ines +Inesita +Inessa +Inez +Inga +Ingaberg +Ingaborg +Inge +Ingeberg +Ingeborg +Inger +Ingrid +Ingunna +Inna +Ioana +Iolande +Iolanthe +Iona +Iormina +Ira +Irena +Irene +Irina +Iris +Irita +Irma +Isa +Isabeau +Isabel +Isabelita +Isabella +Isabelle +Isador +Isadora +Isadore +Isahella +Iseabal +Isidora +Isis +Isobel +Issi +Issie +Issy +Ivett +Ivette +Ivie +Ivonne +Ivory +Ivy +Izabel +Izzi +Jacenta +Jacinda +Jacinta +Jacintha +Jacinthe +Jackelyn +Jacki +Jackie +Jacklin +Jacklyn +Jackquelin +Jackqueline +Jacky +Jaclin +Jaclyn +Jacquelin +Jacqueline +Jacquelyn +Jacquelynn +Jacquenetta +Jacquenette +Jacquetta +Jacquette +Jacqui +Jacquie +Jacynth +Jada +Jade +Jaime +Jaimie +Jaine +Jaleh +Jami +Jamie +Jamima +Jammie +Jan +Jana +Janaya +Janaye +Jandy +Jane +Janean +Janeczka +Janeen +Janel +Janela +Janella +Janelle +Janene +Janenna +Janessa +Janet +Janeta +Janetta +Janette +Janeva +Janey +Jania +Janice +Janie +Janifer +Janina +Janine +Janis +Janith +Janka +Janna +Jannel +Jannelle +Janot +Jany +Jaquelin +Jaquelyn +Jaquenetta +Jaquenette +Jaquith +Jasmin +Jasmina +Jasmine +Jayme +Jaymee +Jayne +Jaynell +Jazmin +Jean +Jeana +Jeane +Jeanelle +Jeanette +Jeanie +Jeanine +Jeanna +Jeanne +Jeannette +Jeannie +Jeannine +Jehanna +Jelene +Jemie +Jemima +Jemimah +Jemmie +Jemmy +Jen +Jena +Jenda +Jenelle +Jenette +Jeni +Jenica +Jeniece +Jenifer +Jeniffer +Jenilee +Jenine +Jenn +Jenna +Jennee +Jennette +Jenni +Jennica +Jennie +Jennifer +Jennilee +Jennine +Jenny +Jeraldine +Jeralee +Jere +Jeri +Jermaine +Jerrie +Jerrilee +Jerrilyn +Jerrine +Jerry +Jerrylee +Jess +Jessa +Jessalin +Jessalyn +Jessamine +Jessamyn +Jesse +Jesselyn +Jessi +Jessica +Jessie +Jessika +Jessy +Jewel +Jewell +Jewelle +Jill +Jillana +Jillane +Jillayne +Jilleen +Jillene +Jilli +Jillian +Jillie +Jilly +Jinny +Jo +Jo Ann +Jo-Ann +Jo-Anne +JoAnn +JoAnne +Joan +Joana +Joane +Joanie +Joann +Joanna +Joanne +Joannes +Jobey +Jobi +Jobie +Jobina +Joby +Jobye +Jobyna +Jocelin +Joceline +Jocelyn +Jocelyne +Jodee +Jodi +Jodie +Jody +Joela +Joelie +Joell +Joella +Joelle +Joellen +Joelly +Joellyn +Joelynn +Joete +Joey +Johanna +Johannah +Johnette +Johnna +Joice +Jojo +Jolee +Joleen +Jolene +Joletta +Joli +Jolie +Joline +Joly +Jolyn +Jolynn +Jonell +Joni +Jonie +Jonis +Jordain +Jordan +Jordana +Jordanna +Jorey +Jori +Jorie +Jorrie +Jorry +Joscelin +Josee +Josefa +Josefina +Joselyn +Josepha +Josephina +Josephine +Josey +Josi +Josie +Joslyn +Josselyn +Josy +Jourdan +Joy +Joya +Joyan +Joyann +Joyce +Joycelin +Joye +Joyous +Juana +Juanita +Jude +Judi +Judie +Judith +Juditha +Judy +Judye +Julee +Juli +Julia +Juliana +Juliane +Juliann +Julianna +Julianne +Julie +Julienne +Juliet +Julieta +Julietta +Juliette +Julina +Juline +Julissa +Julita +Junette +Junia +Junie +Junina +Justin +Justina +Justine +Jyoti +Kacey +Kacie +Kacy +Kai +Kaia +Kaila +Kaile +Kailey +Kaitlin +Kaitlyn +Kaitlynn +Kaja +Kakalina +Kala +Kaleena +Kali +Kalie +Kalila +Kalina +Kalinda +Kalindi +Kalli +Kally +Kameko +Kamila +Kamilah +Kamillah +Kandace +Kandy +Kania +Kanya +Kara +Kara-Lynn +Karalee +Karalynn +Kare +Karee +Karel +Karen +Karena +Kari +Karia +Karie +Karil +Karilynn +Karin +Karina +Karine +Kariotta +Karisa +Karissa +Karita +Karla +Karlee +Karleen +Karlen +Karlene +Karlie +Karlotta +Karlotte +Karly +Karlyn +Karmen +Karna +Karol +Karola +Karole +Karolina +Karoline +Karoly +Karon +Karrah +Karrie +Karry +Kary +Karyl +Karylin +Karyn +Kasey +Kass +Kassandra +Kassey +Kassi +Kassia +Kassie +Kaster +Kat +Kata +Katalin +Kate +Katee +Katerina +Katerine +Katey +Kath +Katha +Katharina +Katharine +Katharyn +Kathe +Katheleen +Katherina +Katherine +Katheryn +Kathi +Kathie +Kathleen +Kathlene +Kathlin +Kathrine +Kathryn +Kathryne +Kathy +Kathye +Kati +Katie +Katina +Katine +Katinka +Katleen +Katlin +Katrina +Katrine +Katrinka +Katti +Kattie +Katuscha +Katusha +Katy +Katya +Kay +Kaycee +Kaye +Kayla +Kayle +Kaylee +Kayley +Kaylil +Kaylyn +Kee +Keeley +Keelia +Keely +Kelcey +Kelci +Kelcie +Kelcy +Kelila +Kellen +Kelley +Kelli +Kellia +Kellie +Kellina +Kellsie +Kelly +Kellyann +Kelsey +Kelsi +Kelsy +Kendra +Kendre +Kenna +Keren +Keri +Keriann +Kerianne +Kerri +Kerrie +Kerrill +Kerrin +Kerry +Kerstin +Kesley +Keslie +Kessia +Kessiah +Ketti +Kettie +Ketty +Kevina +Kevyn +Ki +Kia +Kiah +Kial +Kiele +Kiersten +Kikelia +Kiley +Kim +Kimberlee +Kimberley +Kimberli +Kimberly +Kimberlyn +Kimbra +Kimmi +Kimmie +Kimmy +Kinna +Kip +Kipp +Kippie +Kippy +Kira +Kirbee +Kirbie +Kirby +Kiri +Kirsten +Kirsteni +Kirsti +Kirstie +Kirstin +Kirstyn +Kissee +Kissiah +Kissie +Kit +Kitti +Kittie +Kitty +Kizzee +Kizzie +Klara +Klarika +Klarrisa +Konstance +Konstanze +Koo +Kora +Koral +Koralle +Kordula +Kore +Korella +Koren +Koressa +Kori +Korie +Korney +Korrie +Korry +Kourtney +Kris +Krissie +Krissy +Krista +Kristal +Kristan +Kriste +Kristel +Kristen +Kristi +Kristien +Kristin +Kristina +Kristine +Kristy +Kristyn +Krysta +Krystal +Krystalle +Krystle +Krystyna +Kyla +Kyle +Kylen +Kylie +Kylila +Kylynn +Kym +Kynthia +Kyrstin +La +Lacee +Lacey +Lacie +Lacy +Ladonna +Laetitia +Laila +Laina +Lainey +Lamb +Lana +Lane +Lanette +Laney +Lani +Lanie +Lanita +Lanna +Lanni +Lanny +Lara +Laraine +Lari +Larina +Larine +Larisa +Larissa +Lark +Laryssa +Latashia +Latia +Latisha +Latrena +Latrina +Laura +Lauraine +Laural +Lauralee +Laure +Lauree +Laureen +Laurel +Laurella +Lauren +Laurena +Laurene +Lauretta +Laurette +Lauri +Laurianne +Laurice +Laurie +Lauryn +Lavena +Laverna +Laverne +Lavina +Lavinia +Lavinie +Layla +Layne +Layney +Lea +Leah +Leandra +Leann +Leanna +Leanne +Leanor +Leanora +Lebbie +Leda +Lee +LeeAnn +Leeann +Leeanne +Leela +Leelah +Leena +Leesa +Leese +Legra +Leia +Leiah +Leigh +Leigha +Leila +Leilah +Leisha +Lela +Lelah +Leland +Lelia +Lena +Lenee +Lenette +Lenka +Lenna +Lenora +Lenore +Leodora +Leoine +Leola +Leoline +Leona +Leonanie +Leone +Leonelle +Leonie +Leonora +Leonore +Leontine +Leontyne +Leora +Leorah +Leshia +Lesley +Lesli +Leslie +Lesly +Lesya +Leta +Lethia +Leticia +Letisha +Letitia +Letta +Letti +Lettie +Letty +Leyla +Lezlie +Lia +Lian +Liana +Liane +Lianna +Lianne +Lib +Libbey +Libbi +Libbie +Libby +Licha +Lida +Lidia +Lil +Lila +Lilah +Lilas +Lilia +Lilian +Liliane +Lilias +Lilith +Lilla +Lilli +Lillian +Lillis +Lilllie +Lilly +Lily +Lilyan +Lin +Lina +Lind +Linda +Lindi +Lindie +Lindsay +Lindsey +Lindsy +Lindy +Linea +Linell +Linet +Linette +Linn +Linnea +Linnell +Linnet +Linnie +Linzy +Liora +Liorah +Lira +Lisa +Lisabeth +Lisandra +Lisbeth +Lise +Lisetta +Lisette +Lisha +Lishe +Lissa +Lissi +Lissie +Lissy +Lita +Liuka +Livia +Liz +Liza +Lizabeth +Lizbeth +Lizette +Lizzie +Lizzy +Loella +Lois +Loise +Lola +Lolande +Loleta +Lolita +Lolly +Lona +Lonee +Loni +Lonna +Lonni +Lonnie +Lora +Lorain +Loraine +Loralee +Loralie +Loralyn +Loree +Loreen +Lorelei +Lorelle +Loren +Lorena +Lorene +Lorenza +Loretta +Lorettalorna +Lorette +Lori +Loria +Lorianna +Lorianne +Lorie +Lorilee +Lorilyn +Lorinda +Lorine +Lorita +Lorna +Lorne +Lorraine +Lorrayne +Lorri +Lorrie +Lorrin +Lorry +Lory +Lotta +Lotte +Lotti +Lottie +Lotty +Lou +Louella +Louisa +Louise +Louisette +Love +Luana +Luanna +Luce +Luci +Lucia +Luciana +Lucie +Lucienne +Lucila +Lucilia +Lucille +Lucina +Lucinda +Lucine +Lucita +Lucky +Lucretia +Lucy +Luella +Luelle +Luisa +Luise +Lula +Lulita +Lulu +Luna +Lura +Lurette +Lurleen +Lurlene +Lurline +Lusa +Lust +Lyda +Lydia +Lydie +Lyn +Lynda +Lynde +Lyndel +Lyndell +Lyndsay +Lyndsey +Lyndsie +Lyndy +Lynea +Lynelle +Lynett +Lynette +Lynn +Lynna +Lynne +Lynnea +Lynnell +Lynnelle +Lynnet +Lynnett +Lynnette +Lynsey +Lysandra +Lyssa +Mab +Mabel +Mabelle +Mable +Mada +Madalena +Madalyn +Maddalena +Maddi +Maddie +Maddy +Madel +Madelaine +Madeleine +Madelena +Madelene +Madelin +Madelina +Madeline +Madella +Madelle +Madelon +Madelyn +Madge +Madlen +Madlin +Madona +Madonna +Mady +Mae +Maegan +Mag +Magda +Magdaia +Magdalen +Magdalena +Magdalene +Maggee +Maggi +Maggie +Maggy +Magna +Mahala +Mahalia +Maia +Maible +Maiga +Mair +Maire +Mairead +Maisey +Maisie +Mala +Malanie +Malcah +Malena +Malia +Malina +Malinda +Malinde +Malissa +Malissia +Malka +Malkah +Mallissa +Mallorie +Mallory +Malorie +Malory +Malva +Malvina +Malynda +Mame +Mamie +Manda +Mandi +Mandie +Mandy +Manon +Manya +Mara +Marabel +Marcela +Marcelia +Marcella +Marcelle +Marcellina +Marcelline +Marchelle +Marci +Marcia +Marcie +Marcile +Marcille +Marcy +Mareah +Maren +Marena +Maressa +Marga +Margalit +Margalo +Margaret +Margareta +Margarete +Margaretha +Margarethe +Margaretta +Margarette +Margarita +Margaux +Marge +Margeaux +Margery +Marget +Margette +Margi +Margie +Margit +Marglerite +Margo +Margot +Margret +Marguerite +Margurite +Margy +Mari +Maria +Mariam +Marian +Mariana +Mariann +Marianna +Marianne +Maribel +Maribelle +Maribeth +Marice +Maridel +Marie +Marie-Ann +Marie-Jeanne +Marieann +Mariejeanne +Mariel +Mariele +Marielle +Mariellen +Marietta +Mariette +Marigold +Marijo +Marika +Marilee +Marilin +Marillin +Marilyn +Marin +Marina +Marinna +Marion +Mariquilla +Maris +Marisa +Mariska +Marissa +Marit +Marita +Maritsa +Mariya +Marj +Marja +Marje +Marji +Marjie +Marjorie +Marjory +Marjy +Marketa +Marla +Marlane +Marleah +Marlee +Marleen +Marlena +Marlene +Marley +Marlie +Marline +Marlo +Marlyn +Marna +Marne +Marney +Marni +Marnia +Marnie +Marquita +Marrilee +Marris +Marrissa +Marry +Marsha +Marsiella +Marta +Martelle +Martguerita +Martha +Marthe +Marthena +Marti +Martica +Martie +Martina +Martita +Marty +Martynne +Mary +Marya +Maryangelyn +Maryann +Maryanna +Maryanne +Marybelle +Marybeth +Maryellen +Maryjane +Maryjo +Maryl +Marylee +Marylin +Marylinda +Marylou +Marylynne +Maryrose +Marys +Marysa +Masha +Matelda +Mathilda +Mathilde +Matilda +Matilde +Matti +Mattie +Matty +Maud +Maude +Maudie +Maura +Maure +Maureen +Maureene +Maurene +Maurine +Maurise +Maurita +Mavis +Mavra +Max +Maxi +Maxie +Maxine +Maxy +Maya +Maybelle +Mayda +Maye +Mead +Meade +Meagan +Meaghan +Meara +Mechelle +Meg +Megan +Megen +Meggan +Meggi +Meggie +Meggy +Meghan +Meghann +Mehetabel +Mei +Meira +Mel +Mela +Melamie +Melania +Melanie +Melantha +Melany +Melba +Melesa +Melessa +Melicent +Melina +Melinda +Melinde +Melisa +Melisande +Melisandra +Melisenda +Melisent +Melissa +Melisse +Melita +Melitta +Mella +Melli +Mellicent +Mellie +Mellisa +Mellisent +Mellissa +Melloney +Melly +Melodee +Melodie +Melody +Melonie +Melony +Melosa +Melva +Mercedes +Merci +Mercie +Mercy +Meredith +Meredithe +Meridel +Meridith +Meriel +Merilee +Merilyn +Meris +Merissa +Merl +Merla +Merle +Merlina +Merline +Merna +Merola +Merralee +Merridie +Merrie +Merrielle +Merrile +Merrilee +Merrili +Merrill +Merrily +Merry +Mersey +Meryl +Meta +Mia +Micaela +Michaela +Michaelina +Michaeline +Michaella +Michal +Michel +Michele +Michelina +Micheline +Michell +Michelle +Micki +Mickie +Micky +Midge +Mignon +Mignonne +Miguela +Miguelita +Mildred +Mildrid +Milena +Milicent +Milissent +Milka +Milli +Millicent +Millie +Millisent +Milly +Milzie +Mimi +Min +Mina +Minda +Mindy +Minerva +Minetta +Minette +Minna +Minni +Minnie +Minny +Minta +Miquela +Mira +Mirabel +Mirabella +Mirabelle +Miran +Miranda +Mireielle +Mireille +Mirella +Mirelle +Miriam +Mirilla +Mirna +Misha +Missie +Missy +Misti +Misty +Mitra +Mitzi +Mmarianne +Modesta +Modestia +Modestine +Modesty +Moina +Moira +Moll +Mollee +Molli +Mollie +Molly +Mommy +Mona +Monah +Monica +Monika +Monique +Mora +Moreen +Morena +Morgan +Morgana +Morganica +Morganne +Morgen +Moria +Morissa +Morlee +Morna +Moselle +Moya +Moyna +Moyra +Mozelle +Muffin +Mufi +Mufinella +Muire +Mureil +Murial +Muriel +Murielle +Myna +Myra +Myrah +Myranda +Myriam +Myrilla +Myrle +Myrlene +Myrna +Myrta +Myrtia +Myrtice +Myrtie +Myrtle +Nada +Nadean +Nadeen +Nadia +Nadine +Nadiya +Nady +Nadya +Nalani +Nan +Nana +Nananne +Nance +Nancee +Nancey +Nanci +Nancie +Nancy +Nanete +Nanette +Nani +Nanice +Nanine +Nannette +Nanni +Nannie +Nanny +Nanon +Naoma +Naomi +Nara +Nari +Nariko +Nat +Nata +Natala +Natalee +Natalia +Natalie +Natalina +Nataline +Natalya +Natasha +Natassia +Nathalia +Nathalie +Natka +Natty +Neala +Neda +Nedda +Nedi +Neely +Neila +Neile +Neilla +Neille +Nela +Nelia +Nelie +Nell +Nelle +Nelli +Nellie +Nelly +Nena +Nerissa +Nerita +Nert +Nerta +Nerte +Nerti +Nertie +Nerty +Nessa +Nessi +Nessie +Nessy +Nesta +Netta +Netti +Nettie +Nettle +Netty +Nevsa +Neysa +Nichol +Nichole +Nicholle +Nicki +Nickie +Nicky +Nicol +Nicola +Nicole +Nicolea +Nicolette +Nicoli +Nicolina +Nicoline +Nicolle +Nidia +Nike +Niki +Nikki +Nikkie +Nikoletta +Nikolia +Nil +Nina +Ninetta +Ninette +Ninnetta +Ninnette +Ninon +Nisa +Nissa +Nisse +Nissie +Nissy +Nita +Nitin +Nixie +Noami +Noel +Noelani +Noell +Noella +Noelle +Noellyn +Noelyn +Noemi +Nola +Nolana +Nolie +Nollie +Nomi +Nona +Nonah +Noni +Nonie +Nonna +Nonnah +Nora +Norah +Norean +Noreen +Norene +Norina +Norine +Norma +Norri +Norrie +Norry +Nova +Novelia +Nydia +Nyssa +Octavia +Odele +Odelia +Odelinda +Odella +Odelle +Odessa +Odetta +Odette +Odilia +Odille +Ofelia +Ofella +Ofilia +Ola +Olenka +Olga +Olia +Olimpia +Olive +Olivette +Olivia +Olivie +Oliy +Ollie +Olly +Olva +Olwen +Olympe +Olympia +Olympie +Ondrea +Oneida +Onida +Onlea +Oona +Opal +Opalina +Opaline +Ophelia +Ophelie +Oprah +Ora +Oralee +Oralia +Oralie +Oralla +Oralle +Orel +Orelee +Orelia +Orelie +Orella +Orelle +Oreste +Oriana +Orly +Orsa +Orsola +Ortensia +Otha +Othelia +Othella +Othilia +Othilie +Ottilie +Pacifica +Page +Paige +Paloma +Pam +Pamela +Pamelina +Pamella +Pammi +Pammie +Pammy +Pandora +Pansie +Pansy +Paola +Paolina +Parwane +Pat +Patience +Patrica +Patrice +Patricia +Patrizia +Patsy +Patti +Pattie +Patty +Paula +Paula-Grace +Paule +Pauletta +Paulette +Pauli +Paulie +Paulina +Pauline +Paulita +Pauly +Pavia +Pavla +Pearl +Pearla +Pearle +Pearline +Peg +Pegeen +Peggi +Peggie +Peggy +Pen +Penelopa +Penelope +Penni +Pennie +Penny +Pepi +Pepita +Peri +Peria +Perl +Perla +Perle +Perri +Perrine +Perry +Persis +Pet +Peta +Petra +Petrina +Petronella +Petronia +Petronilla +Petronille +Petunia +Phaedra +Phaidra +Phebe +Phedra +Phelia +Phil +Philipa +Philippa +Philippe +Philippine +Philis +Phillida +Phillie +Phillis +Philly +Philomena +Phoebe +Phylis +Phyllida +Phyllis +Phyllys +Phylys +Pia +Pier +Pierette +Pierrette +Pietra +Piper +Pippa +Pippy +Polly +Pollyanna +Pooh +Poppy +Portia +Pris +Prisca +Priscella +Priscilla +Prissie +Pru +Prudence +Prudi +Prudy +Prue +Prunella +Queada +Queenie +Quentin +Querida +Quinn +Quinta +Quintana +Quintilla +Quintina +Rachael +Rachel +Rachele +Rachelle +Rae +Raf +Rafa +Rafaela +Rafaelia +Rafaelita +Ragnhild +Rahal +Rahel +Raina +Raine +Rakel +Ralina +Ramona +Ramonda +Rana +Randa +Randee +Randene +Randi +Randie +Randy +Ranee +Rani +Rania +Ranice +Ranique +Ranna +Raphaela +Raquel +Raquela +Rasia +Rasla +Raven +Ray +Raychel +Raye +Rayna +Raynell +Rayshell +Rea +Reba +Rebbecca +Rebe +Rebeca +Rebecca +Rebecka +Rebeka +Rebekah +Rebekkah +Ree +Reeba +Reena +Reeta +Reeva +Regan +Reggi +Reggie +Regina +Regine +Reiko +Reina +Reine +Remy +Rena +Renae +Renata +Renate +Rene +Renee +Renel +Renell +Renelle +Renie +Rennie +Reta +Retha +Revkah +Rey +Reyna +Rhea +Rheba +Rheta +Rhetta +Rhiamon +Rhianna +Rhianon +Rhoda +Rhodia +Rhodie +Rhody +Rhona +Rhonda +Riane +Riannon +Rianon +Rica +Ricca +Rici +Ricki +Rickie +Ricky +Riki +Rikki +Rina +Risa +Rissa +Rita +Riva +Rivalee +Rivi +Rivkah +Rivy +Roana +Roanna +Roanne +Robbi +Robbie +Robbin +Robby +Robbyn +Robena +Robenia +Roberta +Robin +Robina +Robinet +Robinett +Robinetta +Robinette +Robinia +Roby +Robyn +Roch +Rochell +Rochella +Rochelle +Rochette +Roda +Rodi +Rodie +Rodina +Romola +Romona +Romonda +Romy +Rona +Ronalda +Ronda +Ronica +Ronna +Ronni +Ronnica +Ronnie +Ronny +Roobbie +Rora +Rori +Rorie +Rory +Ros +Rosa +Rosabel +Rosabella +Rosabelle +Rosaleen +Rosalia +Rosalie +Rosalind +Rosalinda +Rosalinde +Rosaline +Rosalyn +Rosalynd +Rosamond +Rosamund +Rosana +Rosanna +Rosanne +Rosario +Rose +Roseann +Roseanna +Roseanne +Roselia +Roselin +Roseline +Rosella +Roselle +Roselyn +Rosemaria +Rosemarie +Rosemary +Rosemonde +Rosene +Rosetta +Rosette +Roshelle +Rosie +Rosina +Rosita +Roslyn +Rosmunda +Rosy +Row +Rowe +Rowena +Roxana +Roxane +Roxanna +Roxanne +Roxi +Roxie +Roxine +Roxy +Roz +Rozalie +Rozalin +Rozamond +Rozanna +Rozanne +Roze +Rozele +Rozella +Rozelle +Rozina +Rubetta +Rubi +Rubia +Rubie +Rubina +Ruby +Ruella +Ruperta +Ruth +Ruthann +Ruthanne +Ruthe +Ruthi +Ruthie +Ruthy +Ryann +Rycca +Saba +Sabina +Sabine +Sabra +Sabrina +Sacha +Sada +Sadella +Sadie +Sal +Sallee +Salli +Sallie +Sally +Sallyann +Sallyanne +Salome +Sam +Samantha +Samara +Samaria +Sammy +Samuela +Samuella +Sande +Sandi +Sandie +Sandra +Sandy +Sandye +Sapphira +Sapphire +Sara +Sara-Ann +Saraann +Sarah +Sarajane +Saree +Sarena +Sarene +Sarette +Sari +Sarina +Sarine +Sarita +Sascha +Sasha +Sashenka +Saudra +Saundra +Savina +Sayre +Scarlet +Scarlett +Scotty +Sean +Seana +Secunda +Seka +Sela +Selena +Selene +Selestina +Selia +Selie +Selina +Selinda +Seline +Sella +Selle +Selma +Sena +Sephira +Serena +Serene +Shaina +Shaine +Shalna +Shalne +Shamit +Shana +Shanda +Shandee +Shandie +Shandra +Shandy +Shane +Shani +Shanie +Shanna +Shannah +Shannen +Shannon +Shanon +Shanta +Shantee +Shara +Sharai +Shari +Sharia +Sharie +Sharity +Sharl +Sharla +Sharleen +Sharlene +Sharline +Sharna +Sharon +Sharona +Sharra +Sharron +Sharyl +Shaun +Shauna +Shawn +Shawna +Shawnee +Shay +Shayla +Shaylah +Shaylyn +Shaylynn +Shayna +Shayne +Shea +Sheba +Sheela +Sheelagh +Sheelah +Sheena +Sheeree +Sheila +Sheila-Kathryn +Sheilah +Sheilakathryn +Shel +Shela +Shelagh +Shelba +Shelbi +Shelby +Shelia +Shell +Shelley +Shelli +Shellie +Shelly +Shena +Sher +Sheree +Sheri +Sherie +Sheril +Sherill +Sherilyn +Sherline +Sherri +Sherrie +Sherry +Sherye +Sheryl +Shilpa +Shina +Shir +Shira +Shirah +Shirl +Shirlee +Shirleen +Shirlene +Shirley +Shirline +Shoshana +Shoshanna +Shoshie +Siana +Sianna +Sib +Sibbie +Sibby +Sibeal +Sibel +Sibella +Sibelle +Sibilla +Sibley +Sibyl +Sibylla +Sibylle +Sidoney +Sidonia +Sidonnie +Sigrid +Sile +Sileas +Silva +Silvana +Silvia +Silvie +Simona +Simone +Simonette +Simonne +Sindee +Sinead +Siobhan +Sioux +Siouxie +Sisely +Sisile +Sissie +Sissy +Sofia +Sofie +Solange +Sondra +Sonia +Sonja +Sonni +Sonnie +Sonnnie +Sonny +Sonya +Sophey +Sophi +Sophia +Sophie +Sophronia +Sorcha +Sosanna +Stace +Stacee +Stacey +Staci +Stacia +Stacie +Stacy +Stafani +Star +Starla +Starlene +Starlin +Starr +Stefa +Stefania +Stefanie +Steffane +Steffi +Steffie +Stella +Stepha +Stephana +Stephani +Stephanie +Stephannie +Stephenie +Stephi +Stephie +Stephine +Stesha +Stevana +Stevena +Stoddard +Storey +Storm +Stormi +Stormie +Stormy +Sue +Sue-elle +Suellen +Sukey +Suki +Sula +Sunny +Sunshine +Susan +Susana +Susanetta +Susann +Susanna +Susannah +Susanne +Susette +Susi +Susie +Sussi +Susy +Suzan +Suzann +Suzanna +Suzanne +Suzetta +Suzette +Suzi +Suzie +Suzy +Suzzy +Sybil +Sybila +Sybilla +Sybille +Sybyl +Sydel +Sydelle +Sydney +Sylvia +Sylvie +Tabatha +Tabbatha +Tabbi +Tabbie +Tabbitha +Tabby +Tabina +Tabitha +Taffy +Talia +Tallia +Tallie +Tally +Talya +Talyah +Tamar +Tamara +Tamarah +Tamarra +Tamera +Tami +Tamiko +Tamma +Tammara +Tammi +Tammie +Tammy +Tamra +Tana +Tandi +Tandie +Tandy +Tani +Tania +Tansy +Tanya +Tara +Tarah +Tarra +Tarrah +Taryn +Tasha +Tasia +Tate +Tatiana +Tatiania +Tatum +Tawnya +Tawsha +Teane +Ted +Tedda +Teddi +Teddie +Teddy +Tedi +Tedra +Teena +Tella +Teodora +Tera +Teresa +TeresaAnne +Terese +Teresina +Teresita +Teressa +Teri +Teriann +Terina +Terra +Terri +Terri-Jo +Terrianne +Terrie +Terry +Terrye +Tersina +Teryl +Terza +Tess +Tessa +Tessi +Tessie +Tessy +Thalia +Thea +Theada +Theadora +Theda +Thekla +Thelma +Theo +Theodora +Theodosia +Theresa +Theresa-Marie +Therese +Theresina +Theresita +Theressa +Therine +Thia +Thomasa +Thomasin +Thomasina +Thomasine +Tia +Tiana +Tiena +Tierney +Tiertza +Tiff +Tiffani +Tiffanie +Tiffany +Tiffi +Tiffie +Tiffy +Tilda +Tildi +Tildie +Tildy +Tillie +Tilly +Tim +Timi +Timmi +Timmie +Timmy +Timothea +Tina +Tine +Tiphani +Tiphanie +Tiphany +Tish +Tisha +Tobe +Tobey +Tobi +Tobie +Toby +Tobye +Toinette +Toma +Tomasina +Tomasine +Tomi +Tomiko +Tommi +Tommie +Tommy +Toni +Tonia +Tonie +Tony +Tonya +Tootsie +Torey +Tori +Torie +Torrie +Tory +Tova +Tove +Trace +Tracee +Tracey +Traci +Tracie +Tracy +Trenna +Tresa +Trescha +Tressa +Tricia +Trina +Trish +Trisha +Trista +Trix +Trixi +Trixie +Trixy +Truda +Trude +Trudey +Trudi +Trudie +Trudy +Trula +Tuesday +Twila +Twyla +Tybi +Tybie +Tyne +Ula +Ulla +Ulrica +Ulrika +Ulrike +Umeko +Una +Ursa +Ursala +Ursola +Ursula +Ursulina +Ursuline +Uta +Val +Valaree +Valaria +Vale +Valeda +Valencia +Valene +Valenka +Valentia +Valentina +Valentine +Valera +Valeria +Valerie +Valery +Valerye +Valida +Valina +Valli +Vallie +Vally +Valma +Valry +Van +Vanda +Vanessa +Vania +Vanna +Vanni +Vannie +Vanny +Vanya +Veda +Velma +Velvet +Vena +Venita +Ventura +Venus +Vera +Veradis +Vere +Verena +Verene +Veriee +Verile +Verina +Verine +Verla +Verna +Vernice +Veronica +Veronika +Veronike +Veronique +Vi +Vicki +Vickie +Vicky +Victoria +Vida +Viki +Vikki +Vikkie +Vikky +Vilhelmina +Vilma +Vin +Vina +Vinita +Vinni +Vinnie +Vinny +Viola +Violante +Viole +Violet +Violetta +Violette +Virgie +Virgina +Virginia +Virginie +Vita +Vitia +Vitoria +Vittoria +Viv +Viva +Vivi +Vivia +Vivian +Viviana +Vivianna +Vivianne +Vivie +Vivien +Viviene +Vivienne +Viviyan +Vivyan +Vivyanne +Vonni +Vonnie +Vonny +Wallie +Wallis +Wally +Waly +Wanda +Wandie +Wandis +Waneta +Wenda +Wendeline +Wendi +Wendie +Wendy +Wenona +Wenonah +Whitney +Wileen +Wilhelmina +Wilhelmine +Wilie +Willa +Willabella +Willamina +Willetta +Willette +Willi +Willie +Willow +Willy +Willyt +Wilma +Wilmette +Wilona +Wilone +Wilow +Windy +Wini +Winifred +Winna +Winnah +Winne +Winni +Winnie +Winnifred +Winny +Winona +Winonah +Wren +Wrennie +Wylma +Wynn +Wynne +Wynnie +Wynny +Xaviera +Xena +Xenia +Xylia +Xylina +Yalonda +Yehudit +Yelena +Yetta +Yettie +Yetty +Yevette +Yoko +Yolanda +Yolande +Yolane +Yolanthe +Yonina +Yoshi +Yoshiko +Yovonnda +Yvette +Yvonne +Zabrina +Zahara +Zandra +Zaneta +Zara +Zarah +Zaria +Zarla +Zea +Zelda +Zelma +Zena +Zenia +Zia +Zilvia +Zita +Zitella +Zoe +Zola +Zonda +Zondra +Zonnya +Zora +Zorah +Zorana +Zorina +Zorine +Zsa Zsa +Zsazsa +Zulema +Zuzana +Mikako +Kaari +Gita +Geeta diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/key_word.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/key_word.tsv new file mode 100644 index 0000000..861285b --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/key_word.tsv @@ -0,0 +1,6 @@ +chapter +class +part +article +section +paragraph diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/male.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/male.tsv new file mode 100644 index 0000000..08236aa --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/male.tsv @@ -0,0 +1,2946 @@ +Aamir +Aaron +Abbey +Abbie +Abbot +Abbott +Abby +Abdel +Abdul +Abdulkarim +Abdullah +Abe +Abel +Abelard +Abner +Abraham +Abram +Ace +Adair +Adam +Adams +Addie +Adger +Aditya +Adlai +Adnan +Adolf +Adolfo +Adolph +Adolphe +Adolpho +Adolphus +Adrian +Adrick +Adrien +Agamemnon +Aguinaldo +Aguste +Agustin +Aharon +Ahmad +Ahmed +Ahmet +Ajai +Ajay +Al +Alaa +Alain +Alan +Alasdair +Alastair +Albatros +Albert +Alberto +Albrecht +Alden +Aldis +Aldo +Aldric +Aldrich +Aldus +Aldwin +Alec +Aleck +Alejandro +Aleks +Aleksandrs +Alessandro +Alex +Alexander +Alexei +Alexis +Alf +Alfie +Alfonse +Alfonso +Alfonzo +Alford +Alfred +Alfredo +Algernon +Ali +Alic +Alister +Alix +Allah +Allan +Allen +Alley +Allie +Allin +Allyn +Alonso +Alonzo +Aloysius +Alphonse +Alphonso +Alston +Alton +Alvin +Alwin +Amadeus +Ambros +Ambrose +Ambrosi +Ambrosio +Ambrosius +Amenhotep +Amery +Amory +Amos +Anatol +Anatole +Anatollo +Anatoly +Anders +Andie +Andonis +Andre +Andrea +Andreas +Andrej +Andres +Andrew +Andrey +Andri +Andros +Andrus +Andrzej +Andy +Angel +Angelico +Angelo +Angie +Angus +Ansel +Ansell +Anselm +Anson +Anthony +Antin +Antoine +Anton +Antone +Antoni +Antonin +Antonino +Antonio +Antonius +Antony +Anurag +Apollo +Apostolos +Aram +Archibald +Archibold +Archie +Archon +Archy +Arel +Ari +Arie +Ariel +Aristotle +Arlo +Armand +Armando +Armond +Armstrong +Arne +Arnie +Arnold +Arnoldo +Aron +Arron +Art +Arther +Arthur +Artie +Artur +Arturo +Arvie +Arvin +Arvind +Arvy +Ash +Ashby +Ashish +Ashley +Ashton +Aub +Aube +Aubert +Aubrey +Augie +Augustin +Augustine +Augusto +Augustus +Austen +Austin +Ave +Averell +Averil +Averill +Avery +Avi +Avraham +Avram +Avrom +Axel +Aylmer +Aziz +Bailey +Bailie +Baillie +Baily +Baird +Baldwin +Bancroft +Barbabas +Barclay +Bard +Barde +Barn +Barnabas +Barnabe +Barnaby +Barnard +Barnebas +Barnett +Barney +Barnie +Barny +Baron +Barr +Barret +Barrett +Barri +Barrie +Barris +Barron +Barry +Bart +Bartel +Barth +Barthel +Bartholemy +Bartholomeo +Bartholomeus +Bartholomew +Bartie +Bartlet +Bartlett +Bartolemo +Bartolomei +Bartolomeo +Barton +Barty +Bary +Basil +Batholomew +Baxter +Bay +Bayard +Beale +Bealle +Bear +Bearnard +Beau +Beaufort +Beauregard +Beck +Bela +Ben +Benedict +Bengt +Benito +Benjamen +Benjamin +Benji +Benjie +Benjy +Benn +Bennet +Bennett +Bennie +Benny +Benson +Bentley +Benton +Beowulf +Berchtold +Berk +Berke +Berkeley +Berkie +Berkley +Bernard +Bernardo +Bernd +Bernhard +Bernie +Bert +Bertie +Bertram +Bertrand +Bharat +Biff +Bill +Billie +Billy +Bing +Binky +Bishop +Bjorn +Bjorne +Blaine +Blair +Blake +Blare +Blayne +Bo +Bob +Bobbie +Bobby +Bogart +Bogdan +Boniface +Boris +Boyce +Boyd +Brad +Braden +Bradford +Bradley +Bradly +Brady +Brandon +Brandy +Brant +Brendan +Brent +Bret +Brett +Brewer +Brewster +Brian +Brice +Briggs +Brinkley +Britt +Brock +Broddie +Broddy +Broderic +Broderick +Brodie +Brody +Bronson +Brook +Brooke +Brooks +Bruce +Bruno +Bryan +Bryant +Bryce +Bryn +Bryon +Bubba +Buck +Bucky +Bud +Buddy +Burgess +Burke +Burl +Burnaby +Burt +Burton +Buster +Butch +Butler +Byram +Byron +Caesar +Cain +Cal +Caldwell +Caleb +Calhoun +Calvin +Cam +Cameron +Cammy +Carey +Carl +Carleigh +Carlie +Carlin +Carlo +Carlos +Carlton +Carlyle +Carmine +Carroll +Carson +Carsten +Carter +Cary +Caryl +Case +Casey +Caspar +Casper +Cass +Cat +Cecil +Cesar +Chad +Chadd +Chaddie +Chaddy +Chadwick +Chaim +Chalmers +Chan +Chance +Chancey +Chanderjit +Chandler +Chane +Chariot +Charles +Charleton +Charley +Charlie +Charlton +Chas +Chase +Chaunce +Chauncey +Che +Chelton +Chen +Chester +Cheston +Chet +Chev +Chevalier +Chevy +Chip +Chris +Chrissy +Christ +Christian +Christiano +Christie +Christof +Christofer +Christoph +Christophe +Christopher +Christorpher +Christos +Christy +Chrisy +Chuck +Churchill +Clair +Claire +Clancy +Clarance +Clare +Clarence +Clark +Clarke +Claude +Claudio +Claudius +Claus +Clay +Clayborn +Clayborne +Claybourne +Clayton +Cleland +Clem +Clemens +Clement +Clemente +Clemmie +Cletus +Cleveland +Cliff +Clifford +Clifton +Clint +Clinten +Clinton +Clive +Clyde +Cob +Cobb +Cobbie +Cobby +Cody +Colbert +Cole +Coleman +Colin +Collin +Collins +Conan +Connie +Connolly +Connor +Conrad +Conroy +Constantin +Constantine +Constantinos +Conway +Cooper +Corbin +Corby +Corey +Corky +Cornelius +Cornellis +Corrie +Cortese +Corwin +Cory +Cosmo +Costa +Courtney +Craig +Crawford +Creighton +Cris +Cristopher +Curt +Curtice +Curtis +Cy +Cyril +Cyrill +Cyrille +Cyrillus +Cyrus +Dabney +Daffy +Dale +Dallas +Dalton +Damian +Damien +Damon +Dan +Dana +Dane +Dani +Danie +Daniel +Dannie +Danny +Dante +Darby +Darcy +Daren +Darian +Darien +Darin +Dario +Darius +Darrel +Darrell +Darren +Darrick +Darrin +Darryl +Darth +Darwin +Daryl +Daryle +Dave +Davey +David +Davidde +Davide +Davidson +Davie +Davin +Davis +Davon +Davoud +Davy +Dawson +Dean +Deane +Del +Delbert +Dell +Delmar +Demetre +Demetri +Demetris +Demetrius +Demosthenis +Denis +Dennie +Dennis +Denny +Derby +Derek +Derick +Derk +Derrek +Derrick +Derrin +Derrol +Derron +Deryl +Desmond +Desmund +Devin +Devon +Dewey +Dewitt +Dexter +Dick +Dickey +Dickie +Diego +Dieter +Dietrich +Dillon +Dimitri +Dimitrios +Dimitris +Dimitrou +Dimitry +Dino +Dion +Dionis +Dionysus +Dirk +Dmitri +Dom +Domenic +Domenico +Dominic +Dominick +Dominique +Don +Donal +Donald +Donn +Donnie +Donny +Donovan +Dorian +Dory +Doug +Douggie +Dougie +Douglas +Douglass +Douglis +Dov +Doyle +Drake +Drew +Dru +Dryke +Duane +Dudley +Duffie +Duffy +Dugan +Duke +Dunc +Duncan +Dunstan +Durand +Durant +Durante +Durward +Dustin +Dwain +Dwaine +Dwane +Dwayne +Dwight +Dylan +Dyson +Earl +Earle +Easton +Eben +Ebeneser +Ebenezer +Eberhard +Ed +Eddie +Eddy +Edgar +Edgardo +Edie +Edmond +Edmund +Edouard +Edsel +Eduard +Eduardo +Edward +Edwin +Efram +Egbert +Ehud +Elbert +Elden +Eldon +Eli +Elias +Elihu +Elijah +Eliot +Eliott +Elisha +Elliot +Elliott +Ellis +Ellsworth +Ellwood +Elmer +Elmore +Elnar +Elric +Elroy +Elton +Elvin +Elvis +Elwin +Elwood +Elwyn +Ely +Emanuel +Emerson +Emery +Emil +Emile +Emilio +Emmanuel +Emmery +Emmet +Emmett +Emmit +Emmott +Emmy +Emory +Ender +Engelbart +Engelbert +Englebart +Englebert +Enoch +Enrico +Enrique +Ephraim +Ephram +Ephrayim +Ephrem +Er +Erasmus +Erastus +Erek +Erhard +Erhart +Eric +Erich +Erick +Erik +Erin +Erl +Ernest +Ernesto +Ernie +Ernst +Erny +Errol +Ervin +Erwin +Esau +Esme +Esteban +Ethan +Ethelbert +Ethelred +Etienne +Euclid +Eugen +Eugene +Eustace +Ev +Evan +Evelyn +Everard +Everett +Ewan +Ewart +Ez +Ezechiel +Ezekiel +Ezra +Fabian +Fabio +Fairfax +Farley +Fazeel +Federico +Felice +Felicio +Felipe +Felix +Ferd +Ferdie +Ferdinand +Ferdy +Fergus +Ferguson +Ferinand +Fernando +Fidel +Filbert +Filip +Filipe +Filmore +Finley +Finn +Fitz +Fitzgerald +Flem +Fleming +Flemming +Fletch +Fletcher +Flin +Flinn +Flint +Flipper +Florian +Floyd +Flynn +Fons +Fonsie +Fonz +Fonzie +Forbes +Ford +Forest +Forester +Forrest +Forrester +Forster +Foster +Fowler +Fox +Fran +Francesco +Francis +Francisco +Francois +Frank +Frankie +Franklin +Franklyn +Franky +Frans +Franz +Fraser +Frazier +Fred +Freddie +Freddy +Frederic +Frederich +Frederick +Frederico +Frederik +Fredric +Fredrick +Freeman +Freemon +Fremont +French +Friedric +Friedrich +Friedrick +Fritz +Fulton +Fyodor +Gabe +Gabriel +Gabriele +Gabriell +Gabriello +Gail +Gale +Galen +Gallagher +Gamaliel +Garcia +Garcon +Gardener +Gardiner +Gardner +Garey +Garfield +Garfinkel +Garold +Garp +Garret +Garrett +Garrot +Garrott +Garry +Garth +Garv +Garvey +Garvin +Garvy +Garwin +Garwood +Gary +Gaspar +Gasper +Gaston +Gav +Gaven +Gavin +Gavriel +Gay +Gayle +Gearard +Gene +Geo +Geof +Geoff +Geoffrey +Geoffry +Georg +George +Georges +Georgia +Georgie +Georgy +Gerald +Geraldo +Gerard +Gere +Gerhard +Gerhardt +Geri +Germaine +Gerold +Gerome +Gerrard +Gerri +Gerrit +Gerry +Gershom +Gershon +Giacomo +Gian +Giancarlo +Giavani +Gibb +Gideon +Giff +Giffard +Giffer +Giffie +Gifford +Giffy +Gil +Gilbert +Gilberto +Gilburt +Giles +Gill +Gilles +Ginger +Gino +Giordano +Giorgi +Giorgio +Giovanne +Giovanni +Giraldo +Giraud +Giuseppe +Glen +Glenn +Glynn +Godard +Godart +Goddard +Goddart +Godfree +Godfrey +Godfry +Godwin +Gomer +Gonzales +Gonzalo +Goober +Goose +Gordan +Gordie +Gordon +Grace +Grady +Graehme +Graeme +Graham +Graig +Grant +Granville +Greg +Gregg +Greggory +Gregor +Gregorio +Gregory +Gretchen +Griff +Griffin +Griffith +Griswold +Grove +Grover +Guido +Guillaume +Guillermo +Gunner +Gunter +Gunther +Gus +Gustaf +Gustav +Gustave +Gustavo +Gustavus +Guthrey +Guthrie +Guthry +Guy +Hadleigh +Hadley +Hadrian +Hagan +Hagen +Hailey +Hakeem +Hakim +Hal +Hale +Haleigh +Haley +Hall +Hallam +Halvard +Ham +Hamel +Hamid +Hamil +Hamilton +Hamish +Hamlen +Hamlet +Hamlin +Hammad +Hamnet +Han +Hanan +Hanford +Hank +Hannibal +Hans +Hans-Peter +Hansel +Hanson +Harald +Harcourt +Hari +Harlan +Harland +Harley +Harlin +Harman +Harmon +Harold +Harris +Harrison +Harrold +Harry +Hart +Hartley +Hartwell +Harv +Harvard +Harvey +Harvie +Harwell +Hasheem +Hashim +Haskel +Haskell +Hassan +Hastings +Hasty +Haven +Hayden +Haydon +Hayes +Hayward +Haywood +Hazel +Heath +Heathcliff +Hebert +Hector +Heinrich +Heinz +Helmuth +Henderson +Hendrick +Hendrik +Henri +Henrie +Henrik +Henrique +Henry +Herb +Herbert +Herbie +Herby +Hercule +Hercules +Herculie +Herman +Hermann +Hermon +Hermy +Hernando +Herold +Herrick +Herrmann +Hersch +Herschel +Hersh +Hershel +Herve +Hervey +Hew +Hewe +Hewet +Hewett +Hewie +Hewitt +Heywood +Hezekiah +Higgins +Hilary +Hilbert +Hill +Hillard +Hillary +Hillel +Hillery +Hilliard +Hilton +Hiralal +Hiram +Hiro +Hirsch +Hobart +Hodge +Hogan +Hollis +Holly +Homer +Horace +Horacio +Horatio +Horatius +Horst +Howard +Howie +Hoyt +Hubert +Hudson +Huey +Hugh +Hugo +Humbert +Humphrey +Hunt +Hunter +Huntington +Huntlee +Huntley +Hurley +Husain +Husein +Hussein +Hy +Hyatt +Hyman +Hymie +Iago +Iain +Ian +Ibrahim +Ichabod +Iggie +Iggy +Ignace +Ignacio +Ignacius +Ignatius +Ignaz +Ignazio +Igor +Ike +Ikey +Immanuel +Ingamar +Ingelbert +Ingemar +Inglebert +Ingmar +Ingram +Inigo +Innocent +Ira +Irvin +Irvine +Irving +Irwin +Isa +Isaac +Isaak +Isador +Isadore +Isaiah +Ishmael +Isidore +Ismail +Israel +Istvan +Ivan +Ivor +Izaak +Izak +Izzy +Jabez +Jack +Jackie +Jackson +Jacob +Jacques +Jae +Jaime +Jake +Jakob +James +Jameson +Jamey +Jamie +Jan +Janos +Janus +Jared +Jarrett +Jarvis +Jason +Jasper +Javier +Jay +Jean +Jean-Christophe +Jean-Francois +Jean-Lou +Jean-Luc +Jean-Marc +Jean-Paul +Jean-Pierre +Jeb +Jed +Jedediah +Jef +Jeff +Jefferey +Jefferson +Jeffery +Jeffie +Jeffrey +Jeffry +Jefry +Jehu +Jennings +Jens +Jephthah +Jerald +Jeramie +Jere +Jereme +Jeremiah +Jeremias +Jeremie +Jeremy +Jermain +Jermaine +Jermayne +Jerold +Jerome +Jeromy +Jerri +Jerrie +Jerrold +Jerrome +Jerry +Jervis +Jerzy +Jess +Jesse +Jessee +Jessey +Jessie +Jesus +Jeth +Jethro +Jim +Jimbo +Jimmie +Jimmy +Jo +Joab +Joachim +Joao +Joaquin +Job +Jock +Jodi +Jodie +Jody +Joe +Joel +Joey +Johan +Johann +Johannes +John +John-David +John-Patrick +Johnathan +Johnathon +Johnnie +Johnny +Johny +Jon +Jonah +Jonas +Jonathan +Jonathon +Jonny +Jordan +Jordon +Jordy +Jorge +Jory +Jose +Josef +Joseph +Josephus +Josh +Joshua +Joshuah +Josiah +Jotham +Juan +Juanita +Jud +Judah +Judas +Judd +Jude +Judith +Judson +Judy +Juergen +Jule +Jules +Julian +Julie +Julio +Julius +Justin +Justis +Kaiser +Kaleb +Kalil +Kalle +Kalman +Kalvin +Kam +Kane +Kareem +Karel +Karim +Karl +Karsten +Kaspar +Keefe +Keenan +Keene +Keil +Keith +Kellen +Kelley +Kelly +Kelsey +Kelvin +Kelwin +Ken +Kendal +Kendall +Kendrick +Kenn +Kennedy +Kenneth +Kenny +Kent +Kenton +Kenyon +Kermie +Kermit +Kerry +Kevan +Kevin +Kim +Kimball +Kimmo +Kin +Kincaid +King +Kingsley +Kingsly +Kingston +Kip +Kirby +Kirk +Kit +Klaus +Klee +Knox +Konrad +Konstantin +Kory +Kostas +Kraig +Kris +Krishna +Kristian +Kristopher +Kristos +Kurt +Kurtis +Kyle +Laird +Lamar +Lambert +Lamont +Lance +Lancelot +Lane +Langston +Lanny +Larry +Lars +Laurance +Lauren +Laurence +Laurens +Laurent +Laurie +Lawerence +Lawrence +Lawson +Lawton +Lay +Layton +Lazar +Lazare +Lazaro +Lazarus +Lazlo +Lee +Lefty +Leif +Leigh +Leighton +Leland +Lem +Lemar +Lemmie +Lemmy +Lemuel +Len +Lenard +Lennie +Lenny +Leo +Leon +Leonard +Leonardo +Leonerd +Leonhard +Leonid +Leonidas +Leopold +Leroy +Les +Lesley +Leslie +Lester +Lev +Levi +Levin +Levon +Levy +Lew +Lewis +Lex +Liam +Lin +Lincoln +Lind +Lindsay +Lindsey +Lindy +Linoel +Linus +Lion +Lionel +Lionello +Llewellyn +Lloyd +Locke +Lockwood +Logan +Lon +Lonnie +Lonny +Loren +Lorenzo +Lorne +Lorrie +Lothar +Lou +Louie +Louis +Lovell +Lowell +Lucas +Luce +Lucian +Luciano +Lucien +Lucio +Lucius +Ludvig +Ludwig +Luigi +Luis +Lukas +Luke +Luther +Lyle +Lyn +Lyndon +Lynn +Mac +Mace +Mack +Mackenzie +Maddie +Maddy +Madison +Magnum +Magnus +Mahesh +Mahmoud +Mahmud +Maison +Major +Malcolm +Manfred +Manish +Manny +Manuel +Marc +Marcel +Marcello +Marcellus +Marcelo +Marchall +Marcio +Marco +Marcos +Marcus +Marietta +Marilu +Mario +Marion +Marius +Mark +Marko +Markos +Markus +Marlin +Marlo +Marlon +Marlow +Marlowe +Marmaduke +Marsh +Marshal +Marshall +Mart +Martainn +Marten +Martie +Martin +Martino +Marty +Martyn +Marv +Marve +Marven +Marvin +Marwin +Mason +Mateo +Mathew +Mathias +Matias +Matt +Matteo +Matthaeus +Mattheus +Matthew +Matthias +Matthieu +Matthiew +Matthus +Mattias +Mattie +Matty +Maurice +Mauricio +Maurie +Maurise +Maurits +Mauritz +Maury +Max +Maxfield +Maxie +Maxim +Maximilian +Maximilien +Maxwell +Mayer +Maynard +Maynord +Mayor +Mead +Meade +Meier +Meir +Mel +Melvin +Melvyn +Menard +Mendel +Mendie +Meredeth +Meredith +Merell +Merill +Merle +Merlin +Merrel +Merrick +Merril +Merrill +Merry +Merv +Mervin +Merwin +Meryl +Meyer +Mic +Micah +Michael +Michail +Michal +Michale +Micheal +Micheil +Michel +Michele +Mick +Mickey +Mickie +Micky +Miguel +Mika +Mikael +Mike +Mikel +Mikey +Mikhail +Miles +Millicent +Milo +Milt +Milton +Mischa +Mitch +Mitchael +Mitchel +Mitchell +Moe +Mohamad +Mohamed +Mohammad +Mohammed +Mohan +Moise +Moises +Moishe +Monroe +Montague +Monte +Montgomery +Monty +Moore +Mordecai +Morgan +Morlee +Morley +Morly +Morrie +Morris +Morry +Morse +Mort +Morten +Mortie +Mortimer +Morton +Morty +Mose +Moses +Moshe +Moss +Muffin +Mugsy +Muhammad +Munmro +Munroe +Murdoch +Murdock +Murphy +Murray +Mustafa +Myke +Myles +Mylo +Myron +Nahum +Napoleon +Nat +Natale +Nate +Nathan +Nathanael +Nathanial +Nathaniel +Nathanil +Neal +Neale +Neall +Nealon +Nealson +Nealy +Ned +Neddie +Neddy +Neel +Neil +Nels +Nelsen +Nelson +Nero +Neron +Nester +Nestor +Nev +Nevil +Nevile +Neville +Nevin +Nevins +Newton +Niall +Niccolo +Nicholas +Nichole +Nichols +Nick +Nickey +Nickie +Nickolas +Nicky +Nico +Nicolas +Niels +Nigel +Niki +Nikita +Nikki +Nikolai +Nikos +Niles +Nils +Nilson +Niven +Noach +Noah +Noam +Noble +Noe +Noel +Nolan +Noland +Norbert +Norm +Norman +Normand +Normie +Norris +Northrop +Northrup +Norton +Norwood +Nunzio +Obadiah +Obadias +Oberon +Obie +Octavius +Odell +Odie +Odin +Odysseus +Olaf +Olag +Ole +Oleg +Olin +Oliver +Olivier +Olle +Ollie +Omar +Oral +Oran +Orazio +Orbadiah +Oren +Orin +Orion +Orlando +Orren +Orrin +Orson +Orton +Orville +Osbert +Osborn +Osborne +Osbourn +Osbourne +Oscar +Osgood +Osmond +Osmund +Ossie +Oswald +Oswell +Otes +Othello +Otho +Otis +Otto +Owen +Ozzie +Ozzy +Pablo +Pace +Paco +Paddie +Paddy +Padraig +Page +Paige +Pail +Palmer +Paolo +Park +Parke +Parker +Parnell +Parrnell +Parry +Parsifal +Partha +Pascal +Pascale +Pasquale +Pat +Pate +Patel +Paten +Patin +Paton +Patric +Patrice +Patricio +Patrick +Patrik +Patsy +Pattie +Patty +Paul +Paulo +Pavel +Pearce +Pedro +Peirce +Pembroke +Pen +Penn +Pennie +Penny +Penrod +Pepe +Pepillo +Pepito +Perceval +Percival +Percy +Perry +Pete +Peter +Petey +Petr +Peyter +Peyton +Phil +Philbert +Philip +Phillip +Phillipe +Phillipp +Phineas +Phip +Pierce +Pierre +Pierson +Piet +Pieter +Pietro +Piggy +Pincas +Pinchas +Pincus +Piotr +Pip +Pius +Plato +Pooh +Porter +Poul +Powell +Praneetf +Prasad +Prasun +Prent +Prentice +Prentiss +Prescott +Preston +Price +Prince +Pryce +Puff +Purcell +Putnam +Pyotr +Quent +Quentin +Quiggly +Quigly +Quigman +Quill +Quillan +Quincey +Quincy +Quinlan +Quinn +Quint +Quintin +Quinton +Quintus +Rab +Rabbi +Rabi +Rad +Radcliffe +Rafael +Rafe +Ragnar +Raimund +Rainer +Raj +Rajeev +Raleigh +Ralf +Ralph +Ram +Rameses +Ramesh +Ramon +Ramsay +Ramsey +Rand +Randal +Randall +Randell +Randi +Randie +Randolf +Randolph +Randy +Ransell +Ransom +Raoul +Raphael +Raul +Ravi +Ravil +Rawley +Ray +Raymond +Raymund +Raymundo +Raynard +Rayner +Raynor +Reagan +Red +Redford +Redmond +Reece +Reed +Rees +Reese +Reg +Regan +Regen +Reggie +Reggis +Reggy +Reginald +Reginauld +Reid +Reilly +Reinhard +Reinhold +Rem +Remington +Remus +Renado +Renaldo +Renard +Renato +Renaud +Renault +Rene +Reube +Reuben +Reuven +Rex +Rey +Reynard +Reynold +Reynolds +Reza +Rhett +Ric +Ricard +Ricardo +Riccardo +Rice +Rich +Richard +Richardo +Richie +Richmond +Richy +Rick +Rickard +Rickey +Ricki +Rickie +Ricky +Rik +Rikki +Riley +Rinaldo +Ripley +Ritch +Ritchie +Roarke +Rob +Robb +Robbert +Robbie +Robert +Roberto +Robin +Robinson +Rochester +Rock +Rockwell +Rocky +Rod +Rodd +Roddie +Roddy +Roderic +Roderich +Roderick +Roderigo +Rodge +Rodger +Rodney +Rodolfo +Rodolph +Rodolphe +Rodrick +Rodrigo +Rodrique +Rog +Roger +Rogers +Roice +Roland +Rolando +Rolf +Rolfe +Rolland +Rollin +Rollins +Rollo +Rolph +Romain +Roman +Romeo +Ron +Ronald +Ronen +Roni +Ronnie +Ronny +Roosevelt +Rory +Roscoe +Ross +Roth +Rourke +Rowland +Roy +Royal +Royce +Rube +Ruben +Rubin +Ruby +Rudd +Ruddie +Ruddy +Rudie +Rudiger +Rudolf +Rudolfo +Rudolph +Rudy +Rudyard +Rufe +Rufus +Rupert +Ruperto +Russ +Russel +Russell +Rustie +Rustin +Rusty +Rutger +Rutherford +Rutledge +Rutter +Ryan +Sal +Salem +Salim +Salman +Salmon +Salomo +Salomon +Salomone +Salvador +Salvatore +Salvidor +Sam +Sammie +Sammy +Sampson +Samson +Samuel +Samuele +Sancho +Sander +Sanders +Sanderson +Sandor +Sandro +Sandy +Sanford +Sanson +Sansone +Sarge +Sargent +Sascha +Sasha +Saul +Sauncho +Saunder +Saunders +Saunderson +Saundra +Saw +Sawyer +Sawyere +Sax +Saxe +Saxon +Say +Sayer +Sayers +Sayre +Sayres +Scarface +Schroeder +Schuyler +Scot +Scott +Scotti +Scottie +Scotty +Seamus +Sean +Sebastian +Sebastiano +Sebastien +See +Selby +Selig +Serge +Sergeant +Sergei +Sergent +Sergio +Seth +Seymour +Shadow +Shaine +Shalom +Shamus +Shamshi-Adad +Shanan +Shane +Shannan +Shannon +Shaughn +Shaun +Shaw +Shawn +Shay +Shayne +Shea +Sheff +Sheffie +Sheffield +Sheffy +Shelby +Shelden +Sheldon +Shell +Shelley +Shelton +Shem +Shep +Shepard +Shepherd +Sheppard +Shepperd +Sheridan +Sherlock +Sherlocke +Sherman +Sherwin +Sherwood +Sherwynd +Shimon +Shlomo +Sholom +Shorty +Shurlock +Shurlocke +Shurwood +Si +Sibyl +Sid +Siddhartha +Sidnee +Sidney +Siegfried +Siffre +Sig +Sigfrid +Sigfried +Sigmund +Silas +Silvain +Silvan +Silvano +Silvanus +Silvester +Silvio +Sim +Simeon +Simmonds +Simon +Simone +Sinclair +Sinclare +Sivert +Siward +Sixtus +Skell +Skelly +Skip +Skipp +Skipper +Skippie +Skippy +Skipton +Sky +Skye +Skylar +Skyler +Slade +Slim +Sloan +Sloane +Sly +Smith +Smitty +Socrates +Sol +Sollie +Solly +Solomon +Somerset +Son +Sonnie +Sonny +Sparky +Spence +Spencer +Spense +Spenser +Spike +Spiro +Spiros +Spud +Srinivas +Stacy +Staffard +Stafford +Staford +Stan +Standford +Stanfield +Stanford +Stanislaw +Stanleigh +Stanley +Stanly +Stanton +Stanwood +Stavros +Stearn +Stearne +Stefan +Stefano +Steffen +Stephan +Stephanus +Stephen +Sterling +Stern +Sterne +Steve +Steven +Stevie +Stevy +Stew +Steward +Stewart +Stig +Stillman +Stillmann +Sting +Stinky +Stirling +Stu +Stuart +Sturgis +Sullivan +Sully +Sumner +Sunny +Sutherland +Sutton +Sven +Swen +Syd +Sydney +Sylvan +Sylvester +Tab +Tabb +Tabbie +Tabby +Taber +Tabor +Tad +Tadd +Taddeo +Taddeus +Tadeas +Tailor +Tait +Taite +Talbert +Talbot +Tallie +Tally +Tam +Tamas +Tammie +Tammy +Tan +Tann +Tanner +Tanney +Tannie +Tanny +Tarrance +Tarrant +Tarzan +Tate +Taylor +Teador +Ted +Tedd +Teddie +Teddy +Tedie +Tedman +Tedmund +Tedrick +Temp +Temple +Templeton +Teodoor +Teodor +Teodorico +Teodoro +Terence +Terencio +Terrance +Terrel +Terrell +Terrence +Terri +Terrill +Terry +Thacher +Thad +Thaddeus +Thaddius +Thaddus +Thadeus +Thain +Thaine +Thane +Tharen +Thatch +Thatcher +Thaxter +Thayne +Thebault +Thedric +Thedrick +Theo +Theobald +Theodor +Theodore +Theodoric +Theophyllus +Thibaud +Thibaut +Thom +Thomas +Thor +Thorn +Thorndike +Thornie +Thornton +Thorny +Thorpe +Thorstein +Thorsten +Thorvald +Thurstan +Thurston +Tibold +Tiebold +Tiebout +Tiler +Tim +Timmie +Timmy +Timothee +Timotheus +Timothy +Tirrell +Tito +Titos +Titus +Tobe +Tobiah +Tobias +Tobie +Tobin +Tobit +Toby +Tod +Todd +Toddie +Toddy +Tom +Tomas +Tome +Tomkin +Tomlin +Tommie +Tommy +Tonnie +Tony +Tore +Torey +Torin +Torr +Torrance +Torre +Torrence +Torrey +Torrin +Torry +Town +Towney +Townie +Townsend +Towny +Trace +Tracey +Tracie +Tracy +Traver +Travers +Travis +Tray +Tre +Tremain +Tremaine +Tremayne +Trent +Trenton +Trev +Trevar +Trever +Trevor +Trey +Trip +Tristan +Troy +Truman +Tuck +Tucker +Tuckie +Tucky +Tudor +Tull +Tulley +Tully +Turner +Ty +Tybalt +Tye +Tyler +Tymon +Tymothy +Tynan +Tyrone +Tyrus +Tyson +Udale +Udall +Udell +Ugo +Ulberto +Uli +Ulick +Ulises +Ulric +Ulrich +Ulrick +Ulysses +Umberto +Upton +Urbain +Urban +Urbano +Urbanus +Uri +Uriah +Uriel +Urson +Vachel +Vaclav +Vail +Val +Valdemar +Vale +Valentin +Valentine +Valentinian +Van +Vance +Vasili +Vasilis +Vasily +Vassili +Vassily +Vaughan +Vaughn +Venkat +Verge +Vergil +Vern +Verne +Vernen +Verney +Vernon +Vernor +Vic +Vick +Victor +Vijay +Vilhelm +Vin +Vince +Vincent +Vincents +Vinnie +Vinny +Vinod +Virge +Virgie +Virgil +Virgilio +Vite +Vito +Vlad +Vladamir +Vladimir +Voltaire +Von +Wade +Wadsworth +Wain +Waine +Wainwright +Wait +Waite +Waiter +Wake +Wakefield +Wald +Waldemar +Walden +Waldo +Waldon +Waleed +Walker +Wallace +Wallache +Wallas +Wallie +Wallis +Wally +Walsh +Walt +Walter +Walther +Walton +Wang +Ward +Warde +Warden +Ware +Waring +Warner +Warren +Wash +Washington +Wat +Waverley +Waverly +Way +Waylan +Wayland +Waylen +Waylin +Waylon +Wayne +Web +Webb +Weber +Webster +Weidar +Weider +Welbie +Welby +Welch +Wells +Welsh +Wendall +Wendel +Wendell +Werner +Wes +Wesley +Weslie +West +Westbrook +Westbrooke +Westleigh +Westley +Weston +Weylin +Wheeler +Whit +Whitaker +Whitby +Whitman +Whitney +Whittaker +Wiatt +Wilber +Wilbert +Wilbur +Wilburn +Wilburt +Wilden +Wildon +Wilek +Wiley +Wilfred +Wilfrid +Wilhelm +Will +Willard +Willdon +Willem +Willey +Willi +William +Willie +Willis +Willmott +Willy +Wilmar +Wilmer +Wilson +Wilt +Wilton +Win +Windham +Winfield +Winford +Winfred +Winifield +Winn +Winnie +Winny +Winslow +Winston +Winthrop +Winton +Wit +Witold +Wittie +Witty +Wojciech +Wolfgang +Wolfie +Wolfram +Wolfy +Woochang +Woodie +Woodman +Woodrow +Woody +Worden +Worthington +Worthy +Wright +Wyatan +Wyatt +Wye +Wylie +Wyn +Wyndham +Wynn +Wynton +Xavier +Xenos +Xerxes +Xever +Ximenes +Ximenez +Xymenes +Yaakov +Yacov +Yale +Yanaton +Yance +Yancey +Yancy +Yank +Yankee +Yard +Yardley +Yehudi +Yigal +Yule +Yuri +Yves +Zach +Zacharia +Zachariah +Zacharias +Zacharie +Zachary +Zacherie +Zachery +Zack +Zackariah +Zak +Zalman +Zane +Zared +Zary +Zeb +Zebadiah +Zebedee +Zebulen +Zebulon +Zechariah +Zed +Zedekiah +Zeke +Zelig +Zerk +Zeus +Zippy +Zollie +Zolly +Zorro +Rahul +Shumeet +Vibhu diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/roman_to_spoken.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/roman_to_spoken.tsv new file mode 100644 index 0000000..971c90e --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/roman/roman_to_spoken.tsv @@ -0,0 +1,2000 @@ +I one +II two +III three +IV four +V five +VI six +VII seven +VIII eight +IX nine +X ten +XI eleven +XII twelve +XIII thirteen +XIV fourteen +XV fifteen +XVI sixteen +XVII seventeen +XVIII eighteen +XIX nineteen +XX twenty +XXI twenty one +XXII twenty two +XXIII twenty three +XXIV twenty four +XXV twenty five +XXVI twenty six +XXVII twenty seven +XXVIII twenty eight +XXIX twenty nine +XXX thirty +XXXI thirty one +XXXII thirty two +XXXIII thirty three +XXXIV thirty four +XXXV thirty five +XXXVI thirty six +XXXVII thirty seven +XXXVIII thirty eight +XXXIX thirty nine +XL forty +XLI forty one +XLII forty two +XLIII forty three +XLIV forty four +XLV forty five +XLVI forty six +XLVII forty seven +XLVIII forty eight +XLIX forty nine +L fifty +LI fifty one +LII fifty two +LIII fifty three +LIV fifty four +LV fifty five +LVI fifty six +LVII fifty seven +LVIII fifty eight +LIX fifty nine +LX sixty +LXI sixty one +LXII sixty two +LXIII sixty three +LXIV sixty four +LXV sixty five +LXVI sixty six +LXVII sixty seven +LXVIII sixty eight +LXIX sixty nine +LXX seventy +LXXI seventy one +LXXII seventy two +LXXIII seventy three +LXXIV seventy four +LXXV seventy five +LXXVI seventy six +LXXVII seventy seven +LXXVIII seventy eight +LXXIX seventy nine +LXXX eighty +LXXXI eighty one +LXXXII eighty two +LXXXIII eighty three +LXXXIV eighty four +LXXXV eighty five +LXXXVI eighty six +LXXXVII eighty seven +LXXXVIII eighty eight +LXXXIX eighty nine +XC ninety +XCI ninety one +XCII ninety two +XCIII ninety three +XCIV ninety four +XCV ninety five +XCVI ninety six +XCVII ninety seven +XCVIII ninety eight +XCIX ninety nine +C one hundred +CI one hundred one +CII one hundred two +CIII one hundred three +CIV one hundred four +CV one hundred five +CVI one hundred six +CVII one hundred seven +CVIII one hundred eight +CIX one hundred nine +CX one hundred ten +CXI one hundred eleven +CXII one hundred twelve +CXIII one hundred thirteen +CXIV one hundred fourteen +CXV one hundred fifteen +CXVI one hundred sixteen +CXVII one hundred seventeen +CXVIII one hundred eighteen +CXIX one hundred nineteen +CXX one hundred twenty +CXXI one hundred twenty one +CXXII one hundred twenty two +CXXIII one hundred twenty three +CXXIV one hundred twenty four +CXXV one hundred twenty five +CXXVI one hundred twenty six +CXXVII one hundred twenty seven +CXXVIII one hundred twenty eight +CXXIX one hundred twenty nine +CXXX one hundred thirty +CXXXI one hundred thirty one +CXXXII one hundred thirty two +CXXXIII one hundred thirty three +CXXXIV one hundred thirty four +CXXXV one hundred thirty five +CXXXVI one hundred thirty six +CXXXVII one hundred thirty seven +CXXXVIII one hundred thirty eight +CXXXIX one hundred thirty nine +CXL one hundred forty +CXLI one hundred forty one +CXLII one hundred forty two +CXLIII one hundred forty three +CXLIV one hundred forty four +CXLV one hundred forty five +CXLVI one hundred forty six +CXLVII one hundred forty seven +CXLVIII one hundred forty eight +CXLIX one hundred forty nine +CL one hundred fifty +CLI one hundred fifty one +CLII one hundred fifty two +CLIII one hundred fifty three +CLIV one hundred fifty four +CLV one hundred fifty five +CLVI one hundred fifty six +CLVII one hundred fifty seven +CLVIII one hundred fifty eight +CLIX one hundred fifty nine +CLX one hundred sixty +CLXI one hundred sixty one +CLXII one hundred sixty two +CLXIII one hundred sixty three +CLXIV one hundred sixty four +CLXV one hundred sixty five +CLXVI one hundred sixty six +CLXVII one hundred sixty seven +CLXVIII one hundred sixty eight +CLXIX one hundred sixty nine +CLXX one hundred seventy +CLXXI one hundred seventy one +CLXXII one hundred seventy two +CLXXIII one hundred seventy three +CLXXIV one hundred seventy four +CLXXV one hundred seventy five +CLXXVI one hundred seventy six +CLXXVII one hundred seventy seven +CLXXVIII one hundred seventy eight +CLXXIX one hundred seventy nine +CLXXX one hundred eighty +CLXXXI one hundred eighty one +CLXXXII one hundred eighty two +CLXXXIII one hundred eighty three +CLXXXIV one hundred eighty four +CLXXXV one hundred eighty five +CLXXXVI one hundred eighty six +CLXXXVII one hundred eighty seven +CLXXXVIII one hundred eighty eight +CLXXXIX one hundred eighty nine +CXC one hundred ninety +CXCI one hundred ninety one +CXCII one hundred ninety two +CXCIII one hundred ninety three +CXCIV one hundred ninety four +CXCV one hundred ninety five +CXCVI one hundred ninety six +CXCVII one hundred ninety seven +CXCVIII one hundred ninety eight +CXCIX one hundred ninety nine +CC two hundred +CCI two hundred one +CCII two hundred two +CCIII two hundred three +CCIV two hundred four +CCV two hundred five +CCVI two hundred six +CCVII two hundred seven +CCVIII two hundred eight +CCIX two hundred nine +CCX two hundred ten +CCXI two hundred eleven +CCXII two hundred twelve +CCXIII two hundred thirteen +CCXIV two hundred fourteen +CCXV two hundred fifteen +CCXVI two hundred sixteen +CCXVII two hundred seventeen +CCXVIII two hundred eighteen +CCXIX two hundred nineteen +CCXX two hundred twenty +CCXXI two hundred twenty one +CCXXII two hundred twenty two +CCXXIII two hundred twenty three +CCXXIV two hundred twenty four +CCXXV two hundred twenty five +CCXXVI two hundred twenty six +CCXXVII two hundred twenty seven +CCXXVIII two hundred twenty eight +CCXXIX two hundred twenty nine +CCXXX two hundred thirty +CCXXXI two hundred thirty one +CCXXXII two hundred thirty two +CCXXXIII two hundred thirty three +CCXXXIV two hundred thirty four +CCXXXV two hundred thirty five +CCXXXVI two hundred thirty six +CCXXXVII two hundred thirty seven +CCXXXVIII two hundred thirty eight +CCXXXIX two hundred thirty nine +CCXL two hundred forty +CCXLI two hundred forty one +CCXLII two hundred forty two +CCXLIII two hundred forty three +CCXLIV two hundred forty four +CCXLV two hundred forty five +CCXLVI two hundred forty six +CCXLVII two hundred forty seven +CCXLVIII two hundred forty eight +CCXLIX two hundred forty nine +CCL two hundred fifty +CCLI two hundred fifty one +CCLII two hundred fifty two +CCLIII two hundred fifty three +CCLIV two hundred fifty four +CCLV two hundred fifty five +CCLVI two hundred fifty six +CCLVII two hundred fifty seven +CCLVIII two hundred fifty eight +CCLIX two hundred fifty nine +CCLX two hundred sixty +CCLXI two hundred sixty one +CCLXII two hundred sixty two +CCLXIII two hundred sixty three +CCLXIV two hundred sixty four +CCLXV two hundred sixty five +CCLXVI two hundred sixty six +CCLXVII two hundred sixty seven +CCLXVIII two hundred sixty eight +CCLXIX two hundred sixty nine +CCLXX two hundred seventy +CCLXXI two hundred seventy one +CCLXXII two hundred seventy two +CCLXXIII two hundred seventy three +CCLXXIV two hundred seventy four +CCLXXV two hundred seventy five +CCLXXVI two hundred seventy six +CCLXXVII two hundred seventy seven +CCLXXVIII two hundred seventy eight +CCLXXIX two hundred seventy nine +CCLXXX two hundred eighty +CCLXXXI two hundred eighty one +CCLXXXII two hundred eighty two +CCLXXXIII two hundred eighty three +CCLXXXIV two hundred eighty four +CCLXXXV two hundred eighty five +CCLXXXVI two hundred eighty six +CCLXXXVII two hundred eighty seven +CCLXXXVIII two hundred eighty eight +CCLXXXIX two hundred eighty nine +CCXC two hundred ninety +CCXCI two hundred ninety one +CCXCII two hundred ninety two +CCXCIII two hundred ninety three +CCXCIV two hundred ninety four +CCXCV two hundred ninety five +CCXCVI two hundred ninety six +CCXCVII two hundred ninety seven +CCXCVIII two hundred ninety eight +CCXCIX two hundred ninety nine +CCC three hundred +CCCI three hundred one +CCCII three hundred two +CCCIII three hundred three +CCCIV three hundred four +CCCV three hundred five +CCCVI three hundred six +CCCVII three hundred seven +CCCVIII three hundred eight +CCCIX three hundred nine +CCCX three hundred ten +CCCXI three hundred eleven +CCCXII three hundred twelve +CCCXIII three hundred thirteen +CCCXIV three hundred fourteen +CCCXV three hundred fifteen +CCCXVI three hundred sixteen +CCCXVII three hundred seventeen +CCCXVIII three hundred eighteen +CCCXIX three hundred nineteen +CCCXX three hundred twenty +CCCXXI three hundred twenty one +CCCXXII three hundred twenty two +CCCXXIII three hundred twenty three +CCCXXIV three hundred twenty four +CCCXXV three hundred twenty five +CCCXXVI three hundred twenty six +CCCXXVII three hundred twenty seven +CCCXXVIII three hundred twenty eight +CCCXXIX three hundred twenty nine +CCCXXX three hundred thirty +CCCXXXI three hundred thirty one +CCCXXXII three hundred thirty two +CCCXXXIII three hundred thirty three +CCCXXXIV three hundred thirty four +CCCXXXV three hundred thirty five +CCCXXXVI three hundred thirty six +CCCXXXVII three hundred thirty seven +CCCXXXVIII three hundred thirty eight +CCCXXXIX three hundred thirty nine +CCCXL three hundred forty +CCCXLI three hundred forty one +CCCXLII three hundred forty two +CCCXLIII three hundred forty three +CCCXLIV three hundred forty four +CCCXLV three hundred forty five +CCCXLVI three hundred forty six +CCCXLVII three hundred forty seven +CCCXLVIII three hundred forty eight +CCCXLIX three hundred forty nine +CCCL three hundred fifty +CCCLI three hundred fifty one +CCCLII three hundred fifty two +CCCLIII three hundred fifty three +CCCLIV three hundred fifty four +CCCLV three hundred fifty five +CCCLVI three hundred fifty six +CCCLVII three hundred fifty seven +CCCLVIII three hundred fifty eight +CCCLIX three hundred fifty nine +CCCLX three hundred sixty +CCCLXI three hundred sixty one +CCCLXII three hundred sixty two +CCCLXIII three hundred sixty three +CCCLXIV three hundred sixty four +CCCLXV three hundred sixty five +CCCLXVI three hundred sixty six +CCCLXVII three hundred sixty seven +CCCLXVIII three hundred sixty eight +CCCLXIX three hundred sixty nine +CCCLXX three hundred seventy +CCCLXXI three hundred seventy one +CCCLXXII three hundred seventy two +CCCLXXIII three hundred seventy three +CCCLXXIV three hundred seventy four +CCCLXXV three hundred seventy five +CCCLXXVI three hundred seventy six +CCCLXXVII three hundred seventy seven +CCCLXXVIII three hundred seventy eight +CCCLXXIX three hundred seventy nine +CCCLXXX three hundred eighty +CCCLXXXI three hundred eighty one +CCCLXXXII three hundred eighty two +CCCLXXXIII three hundred eighty three +CCCLXXXIV three hundred eighty four +CCCLXXXV three hundred eighty five +CCCLXXXVI three hundred eighty six +CCCLXXXVII three hundred eighty seven +CCCLXXXVIII three hundred eighty eight +CCCLXXXIX three hundred eighty nine +CCCXC three hundred ninety +CCCXCI three hundred ninety one +CCCXCII three hundred ninety two +CCCXCIII three hundred ninety three +CCCXCIV three hundred ninety four +CCCXCV three hundred ninety five +CCCXCVI three hundred ninety six +CCCXCVII three hundred ninety seven +CCCXCVIII three hundred ninety eight +CCCXCIX three hundred ninety nine +CD four hundred +CDI four hundred one +CDII four hundred two +CDIII four hundred three +CDIV four hundred four +CDV four hundred five +CDVI four hundred six +CDVII four hundred seven +CDVIII four hundred eight +CDIX four hundred nine +CDX four hundred ten +CDXI four hundred eleven +CDXII four hundred twelve +CDXIII four hundred thirteen +CDXIV four hundred fourteen +CDXV four hundred fifteen +CDXVI four hundred sixteen +CDXVII four hundred seventeen +CDXVIII four hundred eighteen +CDXIX four hundred nineteen +CDXX four hundred twenty +CDXXI four hundred twenty one +CDXXII four hundred twenty two +CDXXIII four hundred twenty three +CDXXIV four hundred twenty four +CDXXV four hundred twenty five +CDXXVI four hundred twenty six +CDXXVII four hundred twenty seven +CDXXVIII four hundred twenty eight +CDXXIX four hundred twenty nine +CDXXX four hundred thirty +CDXXXI four hundred thirty one +CDXXXII four hundred thirty two +CDXXXIII four hundred thirty three +CDXXXIV four hundred thirty four +CDXXXV four hundred thirty five +CDXXXVI four hundred thirty six +CDXXXVII four hundred thirty seven +CDXXXVIII four hundred thirty eight +CDXXXIX four hundred thirty nine +CDXL four hundred forty +CDXLI four hundred forty one +CDXLII four hundred forty two +CDXLIII four hundred forty three +CDXLIV four hundred forty four +CDXLV four hundred forty five +CDXLVI four hundred forty six +CDXLVII four hundred forty seven +CDXLVIII four hundred forty eight +CDXLIX four hundred forty nine +CDL four hundred fifty +CDLI four hundred fifty one +CDLII four hundred fifty two +CDLIII four hundred fifty three +CDLIV four hundred fifty four +CDLV four hundred fifty five +CDLVI four hundred fifty six +CDLVII four hundred fifty seven +CDLVIII four hundred fifty eight +CDLIX four hundred fifty nine +CDLX four hundred sixty +CDLXI four hundred sixty one +CDLXII four hundred sixty two +CDLXIII four hundred sixty three +CDLXIV four hundred sixty four +CDLXV four hundred sixty five +CDLXVI four hundred sixty six +CDLXVII four hundred sixty seven +CDLXVIII four hundred sixty eight +CDLXIX four hundred sixty nine +CDLXX four hundred seventy +CDLXXI four hundred seventy one +CDLXXII four hundred seventy two +CDLXXIII four hundred seventy three +CDLXXIV four hundred seventy four +CDLXXV four hundred seventy five +CDLXXVI four hundred seventy six +CDLXXVII four hundred seventy seven +CDLXXVIII four hundred seventy eight +CDLXXIX four hundred seventy nine +CDLXXX four hundred eighty +CDLXXXI four hundred eighty one +CDLXXXII four hundred eighty two +CDLXXXIII four hundred eighty three +CDLXXXIV four hundred eighty four +CDLXXXV four hundred eighty five +CDLXXXVI four hundred eighty six +CDLXXXVII four hundred eighty seven +CDLXXXVIII four hundred eighty eight +CDLXXXIX four hundred eighty nine +CDXC four hundred ninety +CDXCI four hundred ninety one +CDXCII four hundred ninety two +CDXCIII four hundred ninety three +CDXCIV four hundred ninety four +CDXCV four hundred ninety five +CDXCVI four hundred ninety six +CDXCVII four hundred ninety seven +CDXCVIII four hundred ninety eight +CDXCIX four hundred ninety nine +D five hundred +DI five hundred one +DII five hundred two +DIII five hundred three +DIV five hundred four +DV five hundred five +DVI five hundred six +DVII five hundred seven +DVIII five hundred eight +DIX five hundred nine +DX five hundred ten +DXI five hundred eleven +DXII five hundred twelve +DXIII five hundred thirteen +DXIV five hundred fourteen +DXV five hundred fifteen +DXVI five hundred sixteen +DXVII five hundred seventeen +DXVIII five hundred eighteen +DXIX five hundred nineteen +DXX five hundred twenty +DXXI five hundred twenty one +DXXII five hundred twenty two +DXXIII five hundred twenty three +DXXIV five hundred twenty four +DXXV five hundred twenty five +DXXVI five hundred twenty six +DXXVII five hundred twenty seven +DXXVIII five hundred twenty eight +DXXIX five hundred twenty nine +DXXX five hundred thirty +DXXXI five hundred thirty one +DXXXII five hundred thirty two +DXXXIII five hundred thirty three +DXXXIV five hundred thirty four +DXXXV five hundred thirty five +DXXXVI five hundred thirty six +DXXXVII five hundred thirty seven +DXXXVIII five hundred thirty eight +DXXXIX five hundred thirty nine +DXL five hundred forty +DXLI five hundred forty one +DXLII five hundred forty two +DXLIII five hundred forty three +DXLIV five hundred forty four +DXLV five hundred forty five +DXLVI five hundred forty six +DXLVII five hundred forty seven +DXLVIII five hundred forty eight +DXLIX five hundred forty nine +DL five hundred fifty +DLI five hundred fifty one +DLII five hundred fifty two +DLIII five hundred fifty three +DLIV five hundred fifty four +DLV five hundred fifty five +DLVI five hundred fifty six +DLVII five hundred fifty seven +DLVIII five hundred fifty eight +DLIX five hundred fifty nine +DLX five hundred sixty +DLXI five hundred sixty one +DLXII five hundred sixty two +DLXIII five hundred sixty three +DLXIV five hundred sixty four +DLXV five hundred sixty five +DLXVI five hundred sixty six +DLXVII five hundred sixty seven +DLXVIII five hundred sixty eight +DLXIX five hundred sixty nine +DLXX five hundred seventy +DLXXI five hundred seventy one +DLXXII five hundred seventy two +DLXXIII five hundred seventy three +DLXXIV five hundred seventy four +DLXXV five hundred seventy five +DLXXVI five hundred seventy six +DLXXVII five hundred seventy seven +DLXXVIII five hundred seventy eight +DLXXIX five hundred seventy nine +DLXXX five hundred eighty +DLXXXI five hundred eighty one +DLXXXII five hundred eighty two +DLXXXIII five hundred eighty three +DLXXXIV five hundred eighty four +DLXXXV five hundred eighty five +DLXXXVI five hundred eighty six +DLXXXVII five hundred eighty seven +DLXXXVIII five hundred eighty eight +DLXXXIX five hundred eighty nine +DXC five hundred ninety +DXCI five hundred ninety one +DXCII five hundred ninety two +DXCIII five hundred ninety three +DXCIV five hundred ninety four +DXCV five hundred ninety five +DXCVI five hundred ninety six +DXCVII five hundred ninety seven +DXCVIII five hundred ninety eight +DXCIX five hundred ninety nine +DC six hundred +DCI six hundred one +DCII six hundred two +DCIII six hundred three +DCIV six hundred four +DCV six hundred five +DCVI six hundred six +DCVII six hundred seven +DCVIII six hundred eight +DCIX six hundred nine +DCX six hundred ten +DCXI six hundred eleven +DCXII six hundred twelve +DCXIII six hundred thirteen +DCXIV six hundred fourteen +DCXV six hundred fifteen +DCXVI six hundred sixteen +DCXVII six hundred seventeen +DCXVIII six hundred eighteen +DCXIX six hundred nineteen +DCXX six hundred twenty +DCXXI six hundred twenty one +DCXXII six hundred twenty two +DCXXIII six hundred twenty three +DCXXIV six hundred twenty four +DCXXV six hundred twenty five +DCXXVI six hundred twenty six +DCXXVII six hundred twenty seven +DCXXVIII six hundred twenty eight +DCXXIX six hundred twenty nine +DCXXX six hundred thirty +DCXXXI six hundred thirty one +DCXXXII six hundred thirty two +DCXXXIII six hundred thirty three +DCXXXIV six hundred thirty four +DCXXXV six hundred thirty five +DCXXXVI six hundred thirty six +DCXXXVII six hundred thirty seven +DCXXXVIII six hundred thirty eight +DCXXXIX six hundred thirty nine +DCXL six hundred forty +DCXLI six hundred forty one +DCXLII six hundred forty two +DCXLIII six hundred forty three +DCXLIV six hundred forty four +DCXLV six hundred forty five +DCXLVI six hundred forty six +DCXLVII six hundred forty seven +DCXLVIII six hundred forty eight +DCXLIX six hundred forty nine +DCL six hundred fifty +DCLI six hundred fifty one +DCLII six hundred fifty two +DCLIII six hundred fifty three +DCLIV six hundred fifty four +DCLV six hundred fifty five +DCLVI six hundred fifty six +DCLVII six hundred fifty seven +DCLVIII six hundred fifty eight +DCLIX six hundred fifty nine +DCLX six hundred sixty +DCLXI six hundred sixty one +DCLXII six hundred sixty two +DCLXIII six hundred sixty three +DCLXIV six hundred sixty four +DCLXV six hundred sixty five +DCLXVI six hundred sixty six +DCLXVII six hundred sixty seven +DCLXVIII six hundred sixty eight +DCLXIX six hundred sixty nine +DCLXX six hundred seventy +DCLXXI six hundred seventy one +DCLXXII six hundred seventy two +DCLXXIII six hundred seventy three +DCLXXIV six hundred seventy four +DCLXXV six hundred seventy five +DCLXXVI six hundred seventy six +DCLXXVII six hundred seventy seven +DCLXXVIII six hundred seventy eight +DCLXXIX six hundred seventy nine +DCLXXX six hundred eighty +DCLXXXI six hundred eighty one +DCLXXXII six hundred eighty two +DCLXXXIII six hundred eighty three +DCLXXXIV six hundred eighty four +DCLXXXV six hundred eighty five +DCLXXXVI six hundred eighty six +DCLXXXVII six hundred eighty seven +DCLXXXVIII six hundred eighty eight +DCLXXXIX six hundred eighty nine +DCXC six hundred ninety +DCXCI six hundred ninety one +DCXCII six hundred ninety two +DCXCIII six hundred ninety three +DCXCIV six hundred ninety four +DCXCV six hundred ninety five +DCXCVI six hundred ninety six +DCXCVII six hundred ninety seven +DCXCVIII six hundred ninety eight +DCXCIX six hundred ninety nine +DCC seven hundred +DCCI seven hundred one +DCCII seven hundred two +DCCIII seven hundred three +DCCIV seven hundred four +DCCV seven hundred five +DCCVI seven hundred six +DCCVII seven hundred seven +DCCVIII seven hundred eight +DCCIX seven hundred nine +DCCX seven hundred ten +DCCXI seven hundred eleven +DCCXII seven hundred twelve +DCCXIII seven hundred thirteen +DCCXIV seven hundred fourteen +DCCXV seven hundred fifteen +DCCXVI seven hundred sixteen +DCCXVII seven hundred seventeen +DCCXVIII seven hundred eighteen +DCCXIX seven hundred nineteen +DCCXX seven hundred twenty +DCCXXI seven hundred twenty one +DCCXXII seven hundred twenty two +DCCXXIII seven hundred twenty three +DCCXXIV seven hundred twenty four +DCCXXV seven hundred twenty five +DCCXXVI seven hundred twenty six +DCCXXVII seven hundred twenty seven +DCCXXVIII seven hundred twenty eight +DCCXXIX seven hundred twenty nine +DCCXXX seven hundred thirty +DCCXXXI seven hundred thirty one +DCCXXXII seven hundred thirty two +DCCXXXIII seven hundred thirty three +DCCXXXIV seven hundred thirty four +DCCXXXV seven hundred thirty five +DCCXXXVI seven hundred thirty six +DCCXXXVII seven hundred thirty seven +DCCXXXVIII seven hundred thirty eight +DCCXXXIX seven hundred thirty nine +DCCXL seven hundred forty +DCCXLI seven hundred forty one +DCCXLII seven hundred forty two +DCCXLIII seven hundred forty three +DCCXLIV seven hundred forty four +DCCXLV seven hundred forty five +DCCXLVI seven hundred forty six +DCCXLVII seven hundred forty seven +DCCXLVIII seven hundred forty eight +DCCXLIX seven hundred forty nine +DCCL seven hundred fifty +DCCLI seven hundred fifty one +DCCLII seven hundred fifty two +DCCLIII seven hundred fifty three +DCCLIV seven hundred fifty four +DCCLV seven hundred fifty five +DCCLVI seven hundred fifty six +DCCLVII seven hundred fifty seven +DCCLVIII seven hundred fifty eight +DCCLIX seven hundred fifty nine +DCCLX seven hundred sixty +DCCLXI seven hundred sixty one +DCCLXII seven hundred sixty two +DCCLXIII seven hundred sixty three +DCCLXIV seven hundred sixty four +DCCLXV seven hundred sixty five +DCCLXVI seven hundred sixty six +DCCLXVII seven hundred sixty seven +DCCLXVIII seven hundred sixty eight +DCCLXIX seven hundred sixty nine +DCCLXX seven hundred seventy +DCCLXXI seven hundred seventy one +DCCLXXII seven hundred seventy two +DCCLXXIII seven hundred seventy three +DCCLXXIV seven hundred seventy four +DCCLXXV seven hundred seventy five +DCCLXXVI seven hundred seventy six +DCCLXXVII seven hundred seventy seven +DCCLXXVIII seven hundred seventy eight +DCCLXXIX seven hundred seventy nine +DCCLXXX seven hundred eighty +DCCLXXXI seven hundred eighty one +DCCLXXXII seven hundred eighty two +DCCLXXXIII seven hundred eighty three +DCCLXXXIV seven hundred eighty four +DCCLXXXV seven hundred eighty five +DCCLXXXVI seven hundred eighty six +DCCLXXXVII seven hundred eighty seven +DCCLXXXVIII seven hundred eighty eight +DCCLXXXIX seven hundred eighty nine +DCCXC seven hundred ninety +DCCXCI seven hundred ninety one +DCCXCII seven hundred ninety two +DCCXCIII seven hundred ninety three +DCCXCIV seven hundred ninety four +DCCXCV seven hundred ninety five +DCCXCVI seven hundred ninety six +DCCXCVII seven hundred ninety seven +DCCXCVIII seven hundred ninety eight +DCCXCIX seven hundred ninety nine +DCCC eight hundred +DCCCI eight hundred one +DCCCII eight hundred two +DCCCIII eight hundred three +DCCCIV eight hundred four +DCCCV eight hundred five +DCCCVI eight hundred six +DCCCVII eight hundred seven +DCCCVIII eight hundred eight +DCCCIX eight hundred nine +DCCCX eight hundred ten +DCCCXI eight hundred eleven +DCCCXII eight hundred twelve +DCCCXIII eight hundred thirteen +DCCCXIV eight hundred fourteen +DCCCXV eight hundred fifteen +DCCCXVI eight hundred sixteen +DCCCXVII eight hundred seventeen +DCCCXVIII eight hundred eighteen +DCCCXIX eight hundred nineteen +DCCCXX eight hundred twenty +DCCCXXI eight hundred twenty one +DCCCXXII eight hundred twenty two +DCCCXXIII eight hundred twenty three +DCCCXXIV eight hundred twenty four +DCCCXXV eight hundred twenty five +DCCCXXVI eight hundred twenty six +DCCCXXVII eight hundred twenty seven +DCCCXXVIII eight hundred twenty eight +DCCCXXIX eight hundred twenty nine +DCCCXXX eight hundred thirty +DCCCXXXI eight hundred thirty one +DCCCXXXII eight hundred thirty two +DCCCXXXIII eight hundred thirty three +DCCCXXXIV eight hundred thirty four +DCCCXXXV eight hundred thirty five +DCCCXXXVI eight hundred thirty six +DCCCXXXVII eight hundred thirty seven +DCCCXXXVIII eight hundred thirty eight +DCCCXXXIX eight hundred thirty nine +DCCCXL eight hundred forty +DCCCXLI eight hundred forty one +DCCCXLII eight hundred forty two +DCCCXLIII eight hundred forty three +DCCCXLIV eight hundred forty four +DCCCXLV eight hundred forty five +DCCCXLVI eight hundred forty six +DCCCXLVII eight hundred forty seven +DCCCXLVIII eight hundred forty eight +DCCCXLIX eight hundred forty nine +DCCCL eight hundred fifty +DCCCLI eight hundred fifty one +DCCCLII eight hundred fifty two +DCCCLIII eight hundred fifty three +DCCCLIV eight hundred fifty four +DCCCLV eight hundred fifty five +DCCCLVI eight hundred fifty six +DCCCLVII eight hundred fifty seven +DCCCLVIII eight hundred fifty eight +DCCCLIX eight hundred fifty nine +DCCCLX eight hundred sixty +DCCCLXI eight hundred sixty one +DCCCLXII eight hundred sixty two +DCCCLXIII eight hundred sixty three +DCCCLXIV eight hundred sixty four +DCCCLXV eight hundred sixty five +DCCCLXVI eight hundred sixty six +DCCCLXVII eight hundred sixty seven +DCCCLXVIII eight hundred sixty eight +DCCCLXIX eight hundred sixty nine +DCCCLXX eight hundred seventy +DCCCLXXI eight hundred seventy one +DCCCLXXII eight hundred seventy two +DCCCLXXIII eight hundred seventy three +DCCCLXXIV eight hundred seventy four +DCCCLXXV eight hundred seventy five +DCCCLXXVI eight hundred seventy six +DCCCLXXVII eight hundred seventy seven +DCCCLXXVIII eight hundred seventy eight +DCCCLXXIX eight hundred seventy nine +DCCCLXXX eight hundred eighty +DCCCLXXXI eight hundred eighty one +DCCCLXXXII eight hundred eighty two +DCCCLXXXIII eight hundred eighty three +DCCCLXXXIV eight hundred eighty four +DCCCLXXXV eight hundred eighty five +DCCCLXXXVI eight hundred eighty six +DCCCLXXXVII eight hundred eighty seven +DCCCLXXXVIII eight hundred eighty eight +DCCCLXXXIX eight hundred eighty nine +DCCCXC eight hundred ninety +DCCCXCI eight hundred ninety one +DCCCXCII eight hundred ninety two +DCCCXCIII eight hundred ninety three +DCCCXCIV eight hundred ninety four +DCCCXCV eight hundred ninety five +DCCCXCVI eight hundred ninety six +DCCCXCVII eight hundred ninety seven +DCCCXCVIII eight hundred ninety eight +DCCCXCIX eight hundred ninety nine +CM nine hundred +CMI nine hundred one +CMII nine hundred two +CMIII nine hundred three +CMIV nine hundred four +CMV nine hundred five +CMVI nine hundred six +CMVII nine hundred seven +CMVIII nine hundred eight +CMIX nine hundred nine +CMX nine hundred ten +CMXI nine hundred eleven +CMXII nine hundred twelve +CMXIII nine hundred thirteen +CMXIV nine hundred fourteen +CMXV nine hundred fifteen +CMXVI nine hundred sixteen +CMXVII nine hundred seventeen +CMXVIII nine hundred eighteen +CMXIX nine hundred nineteen +CMXX nine hundred twenty +CMXXI nine hundred twenty one +CMXXII nine hundred twenty two +CMXXIII nine hundred twenty three +CMXXIV nine hundred twenty four +CMXXV nine hundred twenty five +CMXXVI nine hundred twenty six +CMXXVII nine hundred twenty seven +CMXXVIII nine hundred twenty eight +CMXXIX nine hundred twenty nine +CMXXX nine hundred thirty +CMXXXI nine hundred thirty one +CMXXXII nine hundred thirty two +CMXXXIII nine hundred thirty three +CMXXXIV nine hundred thirty four +CMXXXV nine hundred thirty five +CMXXXVI nine hundred thirty six +CMXXXVII nine hundred thirty seven +CMXXXVIII nine hundred thirty eight +CMXXXIX nine hundred thirty nine +CMXL nine hundred forty +CMXLI nine hundred forty one +CMXLII nine hundred forty two +CMXLIII nine hundred forty three +CMXLIV nine hundred forty four +CMXLV nine hundred forty five +CMXLVI nine hundred forty six +CMXLVII nine hundred forty seven +CMXLVIII nine hundred forty eight +CMXLIX nine hundred forty nine +CML nine hundred fifty +CMLI nine hundred fifty one +CMLII nine hundred fifty two +CMLIII nine hundred fifty three +CMLIV nine hundred fifty four +CMLV nine hundred fifty five +CMLVI nine hundred fifty six +CMLVII nine hundred fifty seven +CMLVIII nine hundred fifty eight +CMLIX nine hundred fifty nine +CMLX nine hundred sixty +CMLXI nine hundred sixty one +CMLXII nine hundred sixty two +CMLXIII nine hundred sixty three +CMLXIV nine hundred sixty four +CMLXV nine hundred sixty five +CMLXVI nine hundred sixty six +CMLXVII nine hundred sixty seven +CMLXVIII nine hundred sixty eight +CMLXIX nine hundred sixty nine +CMLXX nine hundred seventy +CMLXXI nine hundred seventy one +CMLXXII nine hundred seventy two +CMLXXIII nine hundred seventy three +CMLXXIV nine hundred seventy four +CMLXXV nine hundred seventy five +CMLXXVI nine hundred seventy six +CMLXXVII nine hundred seventy seven +CMLXXVIII nine hundred seventy eight +CMLXXIX nine hundred seventy nine +CMLXXX nine hundred eighty +CMLXXXI nine hundred eighty one +CMLXXXII nine hundred eighty two +CMLXXXIII nine hundred eighty three +CMLXXXIV nine hundred eighty four +CMLXXXV nine hundred eighty five +CMLXXXVI nine hundred eighty six +CMLXXXVII nine hundred eighty seven +CMLXXXVIII nine hundred eighty eight +CMLXXXIX nine hundred eighty nine +CMXC nine hundred ninety +CMXCI nine hundred ninety one +CMXCII nine hundred ninety two +CMXCIII nine hundred ninety three +CMXCIV nine hundred ninety four +CMXCV nine hundred ninety five +CMXCVI nine hundred ninety six +CMXCVII nine hundred ninety seven +CMXCVIII nine hundred ninety eight +CMXCIX nine hundred ninety nine +M one thousand +MI one thousand one +MII one thousand two +MIII one thousand three +MIV one thousand four +MV one thousand five +MVI one thousand six +MVII one thousand seven +MVIII one thousand eight +MIX one thousand nine +MX one thousand ten +MXI one thousand eleven +MXII one thousand twelve +MXIII one thousand thirteen +MXIV one thousand fourteen +MXV one thousand fifteen +MXVI one thousand sixteen +MXVII one thousand seventeen +MXVIII one thousand eighteen +MXIX one thousand nineteen +MXX one thousand twenty +MXXI one thousand twenty one +MXXII one thousand twenty two +MXXIII one thousand twenty three +MXXIV one thousand twenty four +MXXV one thousand twenty five +MXXVI one thousand twenty six +MXXVII one thousand twenty seven +MXXVIII one thousand twenty eight +MXXIX one thousand twenty nine +MXXX one thousand thirty +MXXXI one thousand thirty one +MXXXII one thousand thirty two +MXXXIII one thousand thirty three +MXXXIV one thousand thirty four +MXXXV one thousand thirty five +MXXXVI one thousand thirty six +MXXXVII one thousand thirty seven +MXXXVIII one thousand thirty eight +MXXXIX one thousand thirty nine +MXL one thousand forty +MXLI one thousand forty one +MXLII one thousand forty two +MXLIII one thousand forty three +MXLIV one thousand forty four +MXLV one thousand forty five +MXLVI one thousand forty six +MXLVII one thousand forty seven +MXLVIII one thousand forty eight +MXLIX one thousand forty nine +ML one thousand fifty +MLI one thousand fifty one +MLII one thousand fifty two +MLIII one thousand fifty three +MLIV one thousand fifty four +MLV one thousand fifty five +MLVI one thousand fifty six +MLVII one thousand fifty seven +MLVIII one thousand fifty eight +MLIX one thousand fifty nine +MLX one thousand sixty +MLXI one thousand sixty one +MLXII one thousand sixty two +MLXIII one thousand sixty three +MLXIV one thousand sixty four +MLXV one thousand sixty five +MLXVI one thousand sixty six +MLXVII one thousand sixty seven +MLXVIII one thousand sixty eight +MLXIX one thousand sixty nine +MLXX one thousand seventy +MLXXI one thousand seventy one +MLXXII one thousand seventy two +MLXXIII one thousand seventy three +MLXXIV one thousand seventy four +MLXXV one thousand seventy five +MLXXVI one thousand seventy six +MLXXVII one thousand seventy seven +MLXXVIII one thousand seventy eight +MLXXIX one thousand seventy nine +MLXXX one thousand eighty +MLXXXI one thousand eighty one +MLXXXII one thousand eighty two +MLXXXIII one thousand eighty three +MLXXXIV one thousand eighty four +MLXXXV one thousand eighty five +MLXXXVI one thousand eighty six +MLXXXVII one thousand eighty seven +MLXXXVIII one thousand eighty eight +MLXXXIX one thousand eighty nine +MXC one thousand ninety +MXCI one thousand ninety one +MXCII one thousand ninety two +MXCIII one thousand ninety three +MXCIV one thousand ninety four +MXCV one thousand ninety five +MXCVI one thousand ninety six +MXCVII one thousand ninety seven +MXCVIII one thousand ninety eight +MXCIX one thousand ninety nine +MC one thousand, one hundred +MCI one thousand, one hundred one +MCII one thousand, one hundred two +MCIII one thousand, one hundred three +MCIV one thousand, one hundred four +MCV one thousand, one hundred five +MCVI one thousand, one hundred six +MCVII one thousand, one hundred seven +MCVIII one thousand, one hundred eight +MCIX one thousand, one hundred nine +MCX one thousand, one hundred ten +MCXI one thousand, one hundred eleven +MCXII one thousand, one hundred twelve +MCXIII one thousand, one hundred thirteen +MCXIV one thousand, one hundred fourteen +MCXV one thousand, one hundred fifteen +MCXVI one thousand, one hundred sixteen +MCXVII one thousand, one hundred seventeen +MCXVIII one thousand, one hundred eighteen +MCXIX one thousand, one hundred nineteen +MCXX one thousand, one hundred twenty +MCXXI one thousand, one hundred twenty one +MCXXII one thousand, one hundred twenty two +MCXXIII one thousand, one hundred twenty three +MCXXIV one thousand, one hundred twenty four +MCXXV one thousand, one hundred twenty five +MCXXVI one thousand, one hundred twenty six +MCXXVII one thousand, one hundred twenty seven +MCXXVIII one thousand, one hundred twenty eight +MCXXIX one thousand, one hundred twenty nine +MCXXX one thousand, one hundred thirty +MCXXXI one thousand, one hundred thirty one +MCXXXII one thousand, one hundred thirty two +MCXXXIII one thousand, one hundred thirty three +MCXXXIV one thousand, one hundred thirty four +MCXXXV one thousand, one hundred thirty five +MCXXXVI one thousand, one hundred thirty six +MCXXXVII one thousand, one hundred thirty seven +MCXXXVIII one thousand, one hundred thirty eight +MCXXXIX one thousand, one hundred thirty nine +MCXL one thousand, one hundred forty +MCXLI one thousand, one hundred forty one +MCXLII one thousand, one hundred forty two +MCXLIII one thousand, one hundred forty three +MCXLIV one thousand, one hundred forty four +MCXLV one thousand, one hundred forty five +MCXLVI one thousand, one hundred forty six +MCXLVII one thousand, one hundred forty seven +MCXLVIII one thousand, one hundred forty eight +MCXLIX one thousand, one hundred forty nine +MCL one thousand, one hundred fifty +MCLI one thousand, one hundred fifty one +MCLII one thousand, one hundred fifty two +MCLIII one thousand, one hundred fifty three +MCLIV one thousand, one hundred fifty four +MCLV one thousand, one hundred fifty five +MCLVI one thousand, one hundred fifty six +MCLVII one thousand, one hundred fifty seven +MCLVIII one thousand, one hundred fifty eight +MCLIX one thousand, one hundred fifty nine +MCLX one thousand, one hundred sixty +MCLXI one thousand, one hundred sixty one +MCLXII one thousand, one hundred sixty two +MCLXIII one thousand, one hundred sixty three +MCLXIV one thousand, one hundred sixty four +MCLXV one thousand, one hundred sixty five +MCLXVI one thousand, one hundred sixty six +MCLXVII one thousand, one hundred sixty seven +MCLXVIII one thousand, one hundred sixty eight +MCLXIX one thousand, one hundred sixty nine +MCLXX one thousand, one hundred seventy +MCLXXI one thousand, one hundred seventy one +MCLXXII one thousand, one hundred seventy two +MCLXXIII one thousand, one hundred seventy three +MCLXXIV one thousand, one hundred seventy four +MCLXXV one thousand, one hundred seventy five +MCLXXVI one thousand, one hundred seventy six +MCLXXVII one thousand, one hundred seventy seven +MCLXXVIII one thousand, one hundred seventy eight +MCLXXIX one thousand, one hundred seventy nine +MCLXXX one thousand, one hundred eighty +MCLXXXI one thousand, one hundred eighty one +MCLXXXII one thousand, one hundred eighty two +MCLXXXIII one thousand, one hundred eighty three +MCLXXXIV one thousand, one hundred eighty four +MCLXXXV one thousand, one hundred eighty five +MCLXXXVI one thousand, one hundred eighty six +MCLXXXVII one thousand, one hundred eighty seven +MCLXXXVIII one thousand, one hundred eighty eight +MCLXXXIX one thousand, one hundred eighty nine +MCXC one thousand, one hundred ninety +MCXCI one thousand, one hundred ninety one +MCXCII one thousand, one hundred ninety two +MCXCIII one thousand, one hundred ninety three +MCXCIV one thousand, one hundred ninety four +MCXCV one thousand, one hundred ninety five +MCXCVI one thousand, one hundred ninety six +MCXCVII one thousand, one hundred ninety seven +MCXCVIII one thousand, one hundred ninety eight +MCXCIX one thousand, one hundred ninety nine +MCC one thousand, two hundred +MCCI one thousand, two hundred one +MCCII one thousand, two hundred two +MCCIII one thousand, two hundred three +MCCIV one thousand, two hundred four +MCCV one thousand, two hundred five +MCCVI one thousand, two hundred six +MCCVII one thousand, two hundred seven +MCCVIII one thousand, two hundred eight +MCCIX one thousand, two hundred nine +MCCX one thousand, two hundred ten +MCCXI one thousand, two hundred eleven +MCCXII one thousand, two hundred twelve +MCCXIII one thousand, two hundred thirteen +MCCXIV one thousand, two hundred fourteen +MCCXV one thousand, two hundred fifteen +MCCXVI one thousand, two hundred sixteen +MCCXVII one thousand, two hundred seventeen +MCCXVIII one thousand, two hundred eighteen +MCCXIX one thousand, two hundred nineteen +MCCXX one thousand, two hundred twenty +MCCXXI one thousand, two hundred twenty one +MCCXXII one thousand, two hundred twenty two +MCCXXIII one thousand, two hundred twenty three +MCCXXIV one thousand, two hundred twenty four +MCCXXV one thousand, two hundred twenty five +MCCXXVI one thousand, two hundred twenty six +MCCXXVII one thousand, two hundred twenty seven +MCCXXVIII one thousand, two hundred twenty eight +MCCXXIX one thousand, two hundred twenty nine +MCCXXX one thousand, two hundred thirty +MCCXXXI one thousand, two hundred thirty one +MCCXXXII one thousand, two hundred thirty two +MCCXXXIII one thousand, two hundred thirty three +MCCXXXIV one thousand, two hundred thirty four +MCCXXXV one thousand, two hundred thirty five +MCCXXXVI one thousand, two hundred thirty six +MCCXXXVII one thousand, two hundred thirty seven +MCCXXXVIII one thousand, two hundred thirty eight +MCCXXXIX one thousand, two hundred thirty nine +MCCXL one thousand, two hundred forty +MCCXLI one thousand, two hundred forty one +MCCXLII one thousand, two hundred forty two +MCCXLIII one thousand, two hundred forty three +MCCXLIV one thousand, two hundred forty four +MCCXLV one thousand, two hundred forty five +MCCXLVI one thousand, two hundred forty six +MCCXLVII one thousand, two hundred forty seven +MCCXLVIII one thousand, two hundred forty eight +MCCXLIX one thousand, two hundred forty nine +MCCL one thousand, two hundred fifty +MCCLI one thousand, two hundred fifty one +MCCLII one thousand, two hundred fifty two +MCCLIII one thousand, two hundred fifty three +MCCLIV one thousand, two hundred fifty four +MCCLV one thousand, two hundred fifty five +MCCLVI one thousand, two hundred fifty six +MCCLVII one thousand, two hundred fifty seven +MCCLVIII one thousand, two hundred fifty eight +MCCLIX one thousand, two hundred fifty nine +MCCLX one thousand, two hundred sixty +MCCLXI one thousand, two hundred sixty one +MCCLXII one thousand, two hundred sixty two +MCCLXIII one thousand, two hundred sixty three +MCCLXIV one thousand, two hundred sixty four +MCCLXV one thousand, two hundred sixty five +MCCLXVI one thousand, two hundred sixty six +MCCLXVII one thousand, two hundred sixty seven +MCCLXVIII one thousand, two hundred sixty eight +MCCLXIX one thousand, two hundred sixty nine +MCCLXX one thousand, two hundred seventy +MCCLXXI one thousand, two hundred seventy one +MCCLXXII one thousand, two hundred seventy two +MCCLXXIII one thousand, two hundred seventy three +MCCLXXIV one thousand, two hundred seventy four +MCCLXXV one thousand, two hundred seventy five +MCCLXXVI one thousand, two hundred seventy six +MCCLXXVII one thousand, two hundred seventy seven +MCCLXXVIII one thousand, two hundred seventy eight +MCCLXXIX one thousand, two hundred seventy nine +MCCLXXX one thousand, two hundred eighty +MCCLXXXI one thousand, two hundred eighty one +MCCLXXXII one thousand, two hundred eighty two +MCCLXXXIII one thousand, two hundred eighty three +MCCLXXXIV one thousand, two hundred eighty four +MCCLXXXV one thousand, two hundred eighty five +MCCLXXXVI one thousand, two hundred eighty six +MCCLXXXVII one thousand, two hundred eighty seven +MCCLXXXVIII one thousand, two hundred eighty eight +MCCLXXXIX one thousand, two hundred eighty nine +MCCXC one thousand, two hundred ninety +MCCXCI one thousand, two hundred ninety one +MCCXCII one thousand, two hundred ninety two +MCCXCIII one thousand, two hundred ninety three +MCCXCIV one thousand, two hundred ninety four +MCCXCV one thousand, two hundred ninety five +MCCXCVI one thousand, two hundred ninety six +MCCXCVII one thousand, two hundred ninety seven +MCCXCVIII one thousand, two hundred ninety eight +MCCXCIX one thousand, two hundred ninety nine +MCCC one thousand, three hundred +MCCCI one thousand, three hundred one +MCCCII one thousand, three hundred two +MCCCIII one thousand, three hundred three +MCCCIV one thousand, three hundred four +MCCCV one thousand, three hundred five +MCCCVI one thousand, three hundred six +MCCCVII one thousand, three hundred seven +MCCCVIII one thousand, three hundred eight +MCCCIX one thousand, three hundred nine +MCCCX one thousand, three hundred ten +MCCCXI one thousand, three hundred eleven +MCCCXII one thousand, three hundred twelve +MCCCXIII one thousand, three hundred thirteen +MCCCXIV one thousand, three hundred fourteen +MCCCXV one thousand, three hundred fifteen +MCCCXVI one thousand, three hundred sixteen +MCCCXVII one thousand, three hundred seventeen +MCCCXVIII one thousand, three hundred eighteen +MCCCXIX one thousand, three hundred nineteen +MCCCXX one thousand, three hundred twenty +MCCCXXI one thousand, three hundred twenty one +MCCCXXII one thousand, three hundred twenty two +MCCCXXIII one thousand, three hundred twenty three +MCCCXXIV one thousand, three hundred twenty four +MCCCXXV one thousand, three hundred twenty five +MCCCXXVI one thousand, three hundred twenty six +MCCCXXVII one thousand, three hundred twenty seven +MCCCXXVIII one thousand, three hundred twenty eight +MCCCXXIX one thousand, three hundred twenty nine +MCCCXXX one thousand, three hundred thirty +MCCCXXXI one thousand, three hundred thirty one +MCCCXXXII one thousand, three hundred thirty two +MCCCXXXIII one thousand, three hundred thirty three +MCCCXXXIV one thousand, three hundred thirty four +MCCCXXXV one thousand, three hundred thirty five +MCCCXXXVI one thousand, three hundred thirty six +MCCCXXXVII one thousand, three hundred thirty seven +MCCCXXXVIII one thousand, three hundred thirty eight +MCCCXXXIX one thousand, three hundred thirty nine +MCCCXL one thousand, three hundred forty +MCCCXLI one thousand, three hundred forty one +MCCCXLII one thousand, three hundred forty two +MCCCXLIII one thousand, three hundred forty three +MCCCXLIV one thousand, three hundred forty four +MCCCXLV one thousand, three hundred forty five +MCCCXLVI one thousand, three hundred forty six +MCCCXLVII one thousand, three hundred forty seven +MCCCXLVIII one thousand, three hundred forty eight +MCCCXLIX one thousand, three hundred forty nine +MCCCL one thousand, three hundred fifty +MCCCLI one thousand, three hundred fifty one +MCCCLII one thousand, three hundred fifty two +MCCCLIII one thousand, three hundred fifty three +MCCCLIV one thousand, three hundred fifty four +MCCCLV one thousand, three hundred fifty five +MCCCLVI one thousand, three hundred fifty six +MCCCLVII one thousand, three hundred fifty seven +MCCCLVIII one thousand, three hundred fifty eight +MCCCLIX one thousand, three hundred fifty nine +MCCCLX one thousand, three hundred sixty +MCCCLXI one thousand, three hundred sixty one +MCCCLXII one thousand, three hundred sixty two +MCCCLXIII one thousand, three hundred sixty three +MCCCLXIV one thousand, three hundred sixty four +MCCCLXV one thousand, three hundred sixty five +MCCCLXVI one thousand, three hundred sixty six +MCCCLXVII one thousand, three hundred sixty seven +MCCCLXVIII one thousand, three hundred sixty eight +MCCCLXIX one thousand, three hundred sixty nine +MCCCLXX one thousand, three hundred seventy +MCCCLXXI one thousand, three hundred seventy one +MCCCLXXII one thousand, three hundred seventy two +MCCCLXXIII one thousand, three hundred seventy three +MCCCLXXIV one thousand, three hundred seventy four +MCCCLXXV one thousand, three hundred seventy five +MCCCLXXVI one thousand, three hundred seventy six +MCCCLXXVII one thousand, three hundred seventy seven +MCCCLXXVIII one thousand, three hundred seventy eight +MCCCLXXIX one thousand, three hundred seventy nine +MCCCLXXX one thousand, three hundred eighty +MCCCLXXXI one thousand, three hundred eighty one +MCCCLXXXII one thousand, three hundred eighty two +MCCCLXXXIII one thousand, three hundred eighty three +MCCCLXXXIV one thousand, three hundred eighty four +MCCCLXXXV one thousand, three hundred eighty five +MCCCLXXXVI one thousand, three hundred eighty six +MCCCLXXXVII one thousand, three hundred eighty seven +MCCCLXXXVIII one thousand, three hundred eighty eight +MCCCLXXXIX one thousand, three hundred eighty nine +MCCCXC one thousand, three hundred ninety +MCCCXCI one thousand, three hundred ninety one +MCCCXCII one thousand, three hundred ninety two +MCCCXCIII one thousand, three hundred ninety three +MCCCXCIV one thousand, three hundred ninety four +MCCCXCV one thousand, three hundred ninety five +MCCCXCVI one thousand, three hundred ninety six +MCCCXCVII one thousand, three hundred ninety seven +MCCCXCVIII one thousand, three hundred ninety eight +MCCCXCIX one thousand, three hundred ninety nine +MCD one thousand, four hundred +MCDI one thousand, four hundred one +MCDII one thousand, four hundred two +MCDIII one thousand, four hundred three +MCDIV one thousand, four hundred four +MCDV one thousand, four hundred five +MCDVI one thousand, four hundred six +MCDVII one thousand, four hundred seven +MCDVIII one thousand, four hundred eight +MCDIX one thousand, four hundred nine +MCDX one thousand, four hundred ten +MCDXI one thousand, four hundred eleven +MCDXII one thousand, four hundred twelve +MCDXIII one thousand, four hundred thirteen +MCDXIV one thousand, four hundred fourteen +MCDXV one thousand, four hundred fifteen +MCDXVI one thousand, four hundred sixteen +MCDXVII one thousand, four hundred seventeen +MCDXVIII one thousand, four hundred eighteen +MCDXIX one thousand, four hundred nineteen +MCDXX one thousand, four hundred twenty +MCDXXI one thousand, four hundred twenty one +MCDXXII one thousand, four hundred twenty two +MCDXXIII one thousand, four hundred twenty three +MCDXXIV one thousand, four hundred twenty four +MCDXXV one thousand, four hundred twenty five +MCDXXVI one thousand, four hundred twenty six +MCDXXVII one thousand, four hundred twenty seven +MCDXXVIII one thousand, four hundred twenty eight +MCDXXIX one thousand, four hundred twenty nine +MCDXXX one thousand, four hundred thirty +MCDXXXI one thousand, four hundred thirty one +MCDXXXII one thousand, four hundred thirty two +MCDXXXIII one thousand, four hundred thirty three +MCDXXXIV one thousand, four hundred thirty four +MCDXXXV one thousand, four hundred thirty five +MCDXXXVI one thousand, four hundred thirty six +MCDXXXVII one thousand, four hundred thirty seven +MCDXXXVIII one thousand, four hundred thirty eight +MCDXXXIX one thousand, four hundred thirty nine +MCDXL one thousand, four hundred forty +MCDXLI one thousand, four hundred forty one +MCDXLII one thousand, four hundred forty two +MCDXLIII one thousand, four hundred forty three +MCDXLIV one thousand, four hundred forty four +MCDXLV one thousand, four hundred forty five +MCDXLVI one thousand, four hundred forty six +MCDXLVII one thousand, four hundred forty seven +MCDXLVIII one thousand, four hundred forty eight +MCDXLIX one thousand, four hundred forty nine +MCDL one thousand, four hundred fifty +MCDLI one thousand, four hundred fifty one +MCDLII one thousand, four hundred fifty two +MCDLIII one thousand, four hundred fifty three +MCDLIV one thousand, four hundred fifty four +MCDLV one thousand, four hundred fifty five +MCDLVI one thousand, four hundred fifty six +MCDLVII one thousand, four hundred fifty seven +MCDLVIII one thousand, four hundred fifty eight +MCDLIX one thousand, four hundred fifty nine +MCDLX one thousand, four hundred sixty +MCDLXI one thousand, four hundred sixty one +MCDLXII one thousand, four hundred sixty two +MCDLXIII one thousand, four hundred sixty three +MCDLXIV one thousand, four hundred sixty four +MCDLXV one thousand, four hundred sixty five +MCDLXVI one thousand, four hundred sixty six +MCDLXVII one thousand, four hundred sixty seven +MCDLXVIII one thousand, four hundred sixty eight +MCDLXIX one thousand, four hundred sixty nine +MCDLXX one thousand, four hundred seventy +MCDLXXI one thousand, four hundred seventy one +MCDLXXII one thousand, four hundred seventy two +MCDLXXIII one thousand, four hundred seventy three +MCDLXXIV one thousand, four hundred seventy four +MCDLXXV one thousand, four hundred seventy five +MCDLXXVI one thousand, four hundred seventy six +MCDLXXVII one thousand, four hundred seventy seven +MCDLXXVIII one thousand, four hundred seventy eight +MCDLXXIX one thousand, four hundred seventy nine +MCDLXXX one thousand, four hundred eighty +MCDLXXXI one thousand, four hundred eighty one +MCDLXXXII one thousand, four hundred eighty two +MCDLXXXIII one thousand, four hundred eighty three +MCDLXXXIV one thousand, four hundred eighty four +MCDLXXXV one thousand, four hundred eighty five +MCDLXXXVI one thousand, four hundred eighty six +MCDLXXXVII one thousand, four hundred eighty seven +MCDLXXXVIII one thousand, four hundred eighty eight +MCDLXXXIX one thousand, four hundred eighty nine +MCDXC one thousand, four hundred ninety +MCDXCI one thousand, four hundred ninety one +MCDXCII one thousand, four hundred ninety two +MCDXCIII one thousand, four hundred ninety three +MCDXCIV one thousand, four hundred ninety four +MCDXCV one thousand, four hundred ninety five +MCDXCVI one thousand, four hundred ninety six +MCDXCVII one thousand, four hundred ninety seven +MCDXCVIII one thousand, four hundred ninety eight +MCDXCIX one thousand, four hundred ninety nine +MD one thousand, five hundred +MDI one thousand, five hundred one +MDII one thousand, five hundred two +MDIII one thousand, five hundred three +MDIV one thousand, five hundred four +MDV one thousand, five hundred five +MDVI one thousand, five hundred six +MDVII one thousand, five hundred seven +MDVIII one thousand, five hundred eight +MDIX one thousand, five hundred nine +MDX one thousand, five hundred ten +MDXI one thousand, five hundred eleven +MDXII one thousand, five hundred twelve +MDXIII one thousand, five hundred thirteen +MDXIV one thousand, five hundred fourteen +MDXV one thousand, five hundred fifteen +MDXVI one thousand, five hundred sixteen +MDXVII one thousand, five hundred seventeen +MDXVIII one thousand, five hundred eighteen +MDXIX one thousand, five hundred nineteen +MDXX one thousand, five hundred twenty +MDXXI one thousand, five hundred twenty one +MDXXII one thousand, five hundred twenty two +MDXXIII one thousand, five hundred twenty three +MDXXIV one thousand, five hundred twenty four +MDXXV one thousand, five hundred twenty five +MDXXVI one thousand, five hundred twenty six +MDXXVII one thousand, five hundred twenty seven +MDXXVIII one thousand, five hundred twenty eight +MDXXIX one thousand, five hundred twenty nine +MDXXX one thousand, five hundred thirty +MDXXXI one thousand, five hundred thirty one +MDXXXII one thousand, five hundred thirty two +MDXXXIII one thousand, five hundred thirty three +MDXXXIV one thousand, five hundred thirty four +MDXXXV one thousand, five hundred thirty five +MDXXXVI one thousand, five hundred thirty six +MDXXXVII one thousand, five hundred thirty seven +MDXXXVIII one thousand, five hundred thirty eight +MDXXXIX one thousand, five hundred thirty nine +MDXL one thousand, five hundred forty +MDXLI one thousand, five hundred forty one +MDXLII one thousand, five hundred forty two +MDXLIII one thousand, five hundred forty three +MDXLIV one thousand, five hundred forty four +MDXLV one thousand, five hundred forty five +MDXLVI one thousand, five hundred forty six +MDXLVII one thousand, five hundred forty seven +MDXLVIII one thousand, five hundred forty eight +MDXLIX one thousand, five hundred forty nine +MDL one thousand, five hundred fifty +MDLI one thousand, five hundred fifty one +MDLII one thousand, five hundred fifty two +MDLIII one thousand, five hundred fifty three +MDLIV one thousand, five hundred fifty four +MDLV one thousand, five hundred fifty five +MDLVI one thousand, five hundred fifty six +MDLVII one thousand, five hundred fifty seven +MDLVIII one thousand, five hundred fifty eight +MDLIX one thousand, five hundred fifty nine +MDLX one thousand, five hundred sixty +MDLXI one thousand, five hundred sixty one +MDLXII one thousand, five hundred sixty two +MDLXIII one thousand, five hundred sixty three +MDLXIV one thousand, five hundred sixty four +MDLXV one thousand, five hundred sixty five +MDLXVI one thousand, five hundred sixty six +MDLXVII one thousand, five hundred sixty seven +MDLXVIII one thousand, five hundred sixty eight +MDLXIX one thousand, five hundred sixty nine +MDLXX one thousand, five hundred seventy +MDLXXI one thousand, five hundred seventy one +MDLXXII one thousand, five hundred seventy two +MDLXXIII one thousand, five hundred seventy three +MDLXXIV one thousand, five hundred seventy four +MDLXXV one thousand, five hundred seventy five +MDLXXVI one thousand, five hundred seventy six +MDLXXVII one thousand, five hundred seventy seven +MDLXXVIII one thousand, five hundred seventy eight +MDLXXIX one thousand, five hundred seventy nine +MDLXXX one thousand, five hundred eighty +MDLXXXI one thousand, five hundred eighty one +MDLXXXII one thousand, five hundred eighty two +MDLXXXIII one thousand, five hundred eighty three +MDLXXXIV one thousand, five hundred eighty four +MDLXXXV one thousand, five hundred eighty five +MDLXXXVI one thousand, five hundred eighty six +MDLXXXVII one thousand, five hundred eighty seven +MDLXXXVIII one thousand, five hundred eighty eight +MDLXXXIX one thousand, five hundred eighty nine +MDXC one thousand, five hundred ninety +MDXCI one thousand, five hundred ninety one +MDXCII one thousand, five hundred ninety two +MDXCIII one thousand, five hundred ninety three +MDXCIV one thousand, five hundred ninety four +MDXCV one thousand, five hundred ninety five +MDXCVI one thousand, five hundred ninety six +MDXCVII one thousand, five hundred ninety seven +MDXCVIII one thousand, five hundred ninety eight +MDXCIX one thousand, five hundred ninety nine +MDC one thousand, six hundred +MDCI one thousand, six hundred one +MDCII one thousand, six hundred two +MDCIII one thousand, six hundred three +MDCIV one thousand, six hundred four +MDCV one thousand, six hundred five +MDCVI one thousand, six hundred six +MDCVII one thousand, six hundred seven +MDCVIII one thousand, six hundred eight +MDCIX one thousand, six hundred nine +MDCX one thousand, six hundred ten +MDCXI one thousand, six hundred eleven +MDCXII one thousand, six hundred twelve +MDCXIII one thousand, six hundred thirteen +MDCXIV one thousand, six hundred fourteen +MDCXV one thousand, six hundred fifteen +MDCXVI one thousand, six hundred sixteen +MDCXVII one thousand, six hundred seventeen +MDCXVIII one thousand, six hundred eighteen +MDCXIX one thousand, six hundred nineteen +MDCXX one thousand, six hundred twenty +MDCXXI one thousand, six hundred twenty one +MDCXXII one thousand, six hundred twenty two +MDCXXIII one thousand, six hundred twenty three +MDCXXIV one thousand, six hundred twenty four +MDCXXV one thousand, six hundred twenty five +MDCXXVI one thousand, six hundred twenty six +MDCXXVII one thousand, six hundred twenty seven +MDCXXVIII one thousand, six hundred twenty eight +MDCXXIX one thousand, six hundred twenty nine +MDCXXX one thousand, six hundred thirty +MDCXXXI one thousand, six hundred thirty one +MDCXXXII one thousand, six hundred thirty two +MDCXXXIII one thousand, six hundred thirty three +MDCXXXIV one thousand, six hundred thirty four +MDCXXXV one thousand, six hundred thirty five +MDCXXXVI one thousand, six hundred thirty six +MDCXXXVII one thousand, six hundred thirty seven +MDCXXXVIII one thousand, six hundred thirty eight +MDCXXXIX one thousand, six hundred thirty nine +MDCXL one thousand, six hundred forty +MDCXLI one thousand, six hundred forty one +MDCXLII one thousand, six hundred forty two +MDCXLIII one thousand, six hundred forty three +MDCXLIV one thousand, six hundred forty four +MDCXLV one thousand, six hundred forty five +MDCXLVI one thousand, six hundred forty six +MDCXLVII one thousand, six hundred forty seven +MDCXLVIII one thousand, six hundred forty eight +MDCXLIX one thousand, six hundred forty nine +MDCL one thousand, six hundred fifty +MDCLI one thousand, six hundred fifty one +MDCLII one thousand, six hundred fifty two +MDCLIII one thousand, six hundred fifty three +MDCLIV one thousand, six hundred fifty four +MDCLV one thousand, six hundred fifty five +MDCLVI one thousand, six hundred fifty six +MDCLVII one thousand, six hundred fifty seven +MDCLVIII one thousand, six hundred fifty eight +MDCLIX one thousand, six hundred fifty nine +MDCLX one thousand, six hundred sixty +MDCLXI one thousand, six hundred sixty one +MDCLXII one thousand, six hundred sixty two +MDCLXIII one thousand, six hundred sixty three +MDCLXIV one thousand, six hundred sixty four +MDCLXV one thousand, six hundred sixty five +MDCLXVI one thousand, six hundred sixty six +MDCLXVII one thousand, six hundred sixty seven +MDCLXVIII one thousand, six hundred sixty eight +MDCLXIX one thousand, six hundred sixty nine +MDCLXX one thousand, six hundred seventy +MDCLXXI one thousand, six hundred seventy one +MDCLXXII one thousand, six hundred seventy two +MDCLXXIII one thousand, six hundred seventy three +MDCLXXIV one thousand, six hundred seventy four +MDCLXXV one thousand, six hundred seventy five +MDCLXXVI one thousand, six hundred seventy six +MDCLXXVII one thousand, six hundred seventy seven +MDCLXXVIII one thousand, six hundred seventy eight +MDCLXXIX one thousand, six hundred seventy nine +MDCLXXX one thousand, six hundred eighty +MDCLXXXI one thousand, six hundred eighty one +MDCLXXXII one thousand, six hundred eighty two +MDCLXXXIII one thousand, six hundred eighty three +MDCLXXXIV one thousand, six hundred eighty four +MDCLXXXV one thousand, six hundred eighty five +MDCLXXXVI one thousand, six hundred eighty six +MDCLXXXVII one thousand, six hundred eighty seven +MDCLXXXVIII one thousand, six hundred eighty eight +MDCLXXXIX one thousand, six hundred eighty nine +MDCXC one thousand, six hundred ninety +MDCXCI one thousand, six hundred ninety one +MDCXCII one thousand, six hundred ninety two +MDCXCIII one thousand, six hundred ninety three +MDCXCIV one thousand, six hundred ninety four +MDCXCV one thousand, six hundred ninety five +MDCXCVI one thousand, six hundred ninety six +MDCXCVII one thousand, six hundred ninety seven +MDCXCVIII one thousand, six hundred ninety eight +MDCXCIX one thousand, six hundred ninety nine +MDCC one thousand, seven hundred +MDCCI one thousand, seven hundred one +MDCCII one thousand, seven hundred two +MDCCIII one thousand, seven hundred three +MDCCIV one thousand, seven hundred four +MDCCV one thousand, seven hundred five +MDCCVI one thousand, seven hundred six +MDCCVII one thousand, seven hundred seven +MDCCVIII one thousand, seven hundred eight +MDCCIX one thousand, seven hundred nine +MDCCX one thousand, seven hundred ten +MDCCXI one thousand, seven hundred eleven +MDCCXII one thousand, seven hundred twelve +MDCCXIII one thousand, seven hundred thirteen +MDCCXIV one thousand, seven hundred fourteen +MDCCXV one thousand, seven hundred fifteen +MDCCXVI one thousand, seven hundred sixteen +MDCCXVII one thousand, seven hundred seventeen +MDCCXVIII one thousand, seven hundred eighteen +MDCCXIX one thousand, seven hundred nineteen +MDCCXX one thousand, seven hundred twenty +MDCCXXI one thousand, seven hundred twenty one +MDCCXXII one thousand, seven hundred twenty two +MDCCXXIII one thousand, seven hundred twenty three +MDCCXXIV one thousand, seven hundred twenty four +MDCCXXV one thousand, seven hundred twenty five +MDCCXXVI one thousand, seven hundred twenty six +MDCCXXVII one thousand, seven hundred twenty seven +MDCCXXVIII one thousand, seven hundred twenty eight +MDCCXXIX one thousand, seven hundred twenty nine +MDCCXXX one thousand, seven hundred thirty +MDCCXXXI one thousand, seven hundred thirty one +MDCCXXXII one thousand, seven hundred thirty two +MDCCXXXIII one thousand, seven hundred thirty three +MDCCXXXIV one thousand, seven hundred thirty four +MDCCXXXV one thousand, seven hundred thirty five +MDCCXXXVI one thousand, seven hundred thirty six +MDCCXXXVII one thousand, seven hundred thirty seven +MDCCXXXVIII one thousand, seven hundred thirty eight +MDCCXXXIX one thousand, seven hundred thirty nine +MDCCXL one thousand, seven hundred forty +MDCCXLI one thousand, seven hundred forty one +MDCCXLII one thousand, seven hundred forty two +MDCCXLIII one thousand, seven hundred forty three +MDCCXLIV one thousand, seven hundred forty four +MDCCXLV one thousand, seven hundred forty five +MDCCXLVI one thousand, seven hundred forty six +MDCCXLVII one thousand, seven hundred forty seven +MDCCXLVIII one thousand, seven hundred forty eight +MDCCXLIX one thousand, seven hundred forty nine +MDCCL one thousand, seven hundred fifty +MDCCLI one thousand, seven hundred fifty one +MDCCLII one thousand, seven hundred fifty two +MDCCLIII one thousand, seven hundred fifty three +MDCCLIV one thousand, seven hundred fifty four +MDCCLV one thousand, seven hundred fifty five +MDCCLVI one thousand, seven hundred fifty six +MDCCLVII one thousand, seven hundred fifty seven +MDCCLVIII one thousand, seven hundred fifty eight +MDCCLIX one thousand, seven hundred fifty nine +MDCCLX one thousand, seven hundred sixty +MDCCLXI one thousand, seven hundred sixty one +MDCCLXII one thousand, seven hundred sixty two +MDCCLXIII one thousand, seven hundred sixty three +MDCCLXIV one thousand, seven hundred sixty four +MDCCLXV one thousand, seven hundred sixty five +MDCCLXVI one thousand, seven hundred sixty six +MDCCLXVII one thousand, seven hundred sixty seven +MDCCLXVIII one thousand, seven hundred sixty eight +MDCCLXIX one thousand, seven hundred sixty nine +MDCCLXX one thousand, seven hundred seventy +MDCCLXXI one thousand, seven hundred seventy one +MDCCLXXII one thousand, seven hundred seventy two +MDCCLXXIII one thousand, seven hundred seventy three +MDCCLXXIV one thousand, seven hundred seventy four +MDCCLXXV one thousand, seven hundred seventy five +MDCCLXXVI one thousand, seven hundred seventy six +MDCCLXXVII one thousand, seven hundred seventy seven +MDCCLXXVIII one thousand, seven hundred seventy eight +MDCCLXXIX one thousand, seven hundred seventy nine +MDCCLXXX one thousand, seven hundred eighty +MDCCLXXXI one thousand, seven hundred eighty one +MDCCLXXXII one thousand, seven hundred eighty two +MDCCLXXXIII one thousand, seven hundred eighty three +MDCCLXXXIV one thousand, seven hundred eighty four +MDCCLXXXV one thousand, seven hundred eighty five +MDCCLXXXVI one thousand, seven hundred eighty six +MDCCLXXXVII one thousand, seven hundred eighty seven +MDCCLXXXVIII one thousand, seven hundred eighty eight +MDCCLXXXIX one thousand, seven hundred eighty nine +MDCCXC one thousand, seven hundred ninety +MDCCXCI one thousand, seven hundred ninety one +MDCCXCII one thousand, seven hundred ninety two +MDCCXCIII one thousand, seven hundred ninety three +MDCCXCIV one thousand, seven hundred ninety four +MDCCXCV one thousand, seven hundred ninety five +MDCCXCVI one thousand, seven hundred ninety six +MDCCXCVII one thousand, seven hundred ninety seven +MDCCXCVIII one thousand, seven hundred ninety eight +MDCCXCIX one thousand, seven hundred ninety nine +MDCCC one thousand, eight hundred +MDCCCI one thousand, eight hundred one +MDCCCII one thousand, eight hundred two +MDCCCIII one thousand, eight hundred three +MDCCCIV one thousand, eight hundred four +MDCCCV one thousand, eight hundred five +MDCCCVI one thousand, eight hundred six +MDCCCVII one thousand, eight hundred seven +MDCCCVIII one thousand, eight hundred eight +MDCCCIX one thousand, eight hundred nine +MDCCCX one thousand, eight hundred ten +MDCCCXI one thousand, eight hundred eleven +MDCCCXII one thousand, eight hundred twelve +MDCCCXIII one thousand, eight hundred thirteen +MDCCCXIV one thousand, eight hundred fourteen +MDCCCXV one thousand, eight hundred fifteen +MDCCCXVI one thousand, eight hundred sixteen +MDCCCXVII one thousand, eight hundred seventeen +MDCCCXVIII one thousand, eight hundred eighteen +MDCCCXIX one thousand, eight hundred nineteen +MDCCCXX one thousand, eight hundred twenty +MDCCCXXI one thousand, eight hundred twenty one +MDCCCXXII one thousand, eight hundred twenty two +MDCCCXXIII one thousand, eight hundred twenty three +MDCCCXXIV one thousand, eight hundred twenty four +MDCCCXXV one thousand, eight hundred twenty five +MDCCCXXVI one thousand, eight hundred twenty six +MDCCCXXVII one thousand, eight hundred twenty seven +MDCCCXXVIII one thousand, eight hundred twenty eight +MDCCCXXIX one thousand, eight hundred twenty nine +MDCCCXXX one thousand, eight hundred thirty +MDCCCXXXI one thousand, eight hundred thirty one +MDCCCXXXII one thousand, eight hundred thirty two +MDCCCXXXIII one thousand, eight hundred thirty three +MDCCCXXXIV one thousand, eight hundred thirty four +MDCCCXXXV one thousand, eight hundred thirty five +MDCCCXXXVI one thousand, eight hundred thirty six +MDCCCXXXVII one thousand, eight hundred thirty seven +MDCCCXXXVIII one thousand, eight hundred thirty eight +MDCCCXXXIX one thousand, eight hundred thirty nine +MDCCCXL one thousand, eight hundred forty +MDCCCXLI one thousand, eight hundred forty one +MDCCCXLII one thousand, eight hundred forty two +MDCCCXLIII one thousand, eight hundred forty three +MDCCCXLIV one thousand, eight hundred forty four +MDCCCXLV one thousand, eight hundred forty five +MDCCCXLVI one thousand, eight hundred forty six +MDCCCXLVII one thousand, eight hundred forty seven +MDCCCXLVIII one thousand, eight hundred forty eight +MDCCCXLIX one thousand, eight hundred forty nine +MDCCCL one thousand, eight hundred fifty +MDCCCLI one thousand, eight hundred fifty one +MDCCCLII one thousand, eight hundred fifty two +MDCCCLIII one thousand, eight hundred fifty three +MDCCCLIV one thousand, eight hundred fifty four +MDCCCLV one thousand, eight hundred fifty five +MDCCCLVI one thousand, eight hundred fifty six +MDCCCLVII one thousand, eight hundred fifty seven +MDCCCLVIII one thousand, eight hundred fifty eight +MDCCCLIX one thousand, eight hundred fifty nine +MDCCCLX one thousand, eight hundred sixty +MDCCCLXI one thousand, eight hundred sixty one +MDCCCLXII one thousand, eight hundred sixty two +MDCCCLXIII one thousand, eight hundred sixty three +MDCCCLXIV one thousand, eight hundred sixty four +MDCCCLXV one thousand, eight hundred sixty five +MDCCCLXVI one thousand, eight hundred sixty six +MDCCCLXVII one thousand, eight hundred sixty seven +MDCCCLXVIII one thousand, eight hundred sixty eight +MDCCCLXIX one thousand, eight hundred sixty nine +MDCCCLXX one thousand, eight hundred seventy +MDCCCLXXI one thousand, eight hundred seventy one +MDCCCLXXII one thousand, eight hundred seventy two +MDCCCLXXIII one thousand, eight hundred seventy three +MDCCCLXXIV one thousand, eight hundred seventy four +MDCCCLXXV one thousand, eight hundred seventy five +MDCCCLXXVI one thousand, eight hundred seventy six +MDCCCLXXVII one thousand, eight hundred seventy seven +MDCCCLXXVIII one thousand, eight hundred seventy eight +MDCCCLXXIX one thousand, eight hundred seventy nine +MDCCCLXXX one thousand, eight hundred eighty +MDCCCLXXXI one thousand, eight hundred eighty one +MDCCCLXXXII one thousand, eight hundred eighty two +MDCCCLXXXIII one thousand, eight hundred eighty three +MDCCCLXXXIV one thousand, eight hundred eighty four +MDCCCLXXXV one thousand, eight hundred eighty five +MDCCCLXXXVI one thousand, eight hundred eighty six +MDCCCLXXXVII one thousand, eight hundred eighty seven +MDCCCLXXXVIII one thousand, eight hundred eighty eight +MDCCCLXXXIX one thousand, eight hundred eighty nine +MDCCCXC one thousand, eight hundred ninety +MDCCCXCI one thousand, eight hundred ninety one +MDCCCXCII one thousand, eight hundred ninety two +MDCCCXCIII one thousand, eight hundred ninety three +MDCCCXCIV one thousand, eight hundred ninety four +MDCCCXCV one thousand, eight hundred ninety five +MDCCCXCVI one thousand, eight hundred ninety six +MDCCCXCVII one thousand, eight hundred ninety seven +MDCCCXCVIII one thousand, eight hundred ninety eight +MDCCCXCIX one thousand, eight hundred ninety nine +MCM one thousand, nine hundred +MCMI one thousand, nine hundred one +MCMII one thousand, nine hundred two +MCMIII one thousand, nine hundred three +MCMIV one thousand, nine hundred four +MCMV one thousand, nine hundred five +MCMVI one thousand, nine hundred six +MCMVII one thousand, nine hundred seven +MCMVIII one thousand, nine hundred eight +MCMIX one thousand, nine hundred nine +MCMX one thousand, nine hundred ten +MCMXI one thousand, nine hundred eleven +MCMXII one thousand, nine hundred twelve +MCMXIII one thousand, nine hundred thirteen +MCMXIV one thousand, nine hundred fourteen +MCMXV one thousand, nine hundred fifteen +MCMXVI one thousand, nine hundred sixteen +MCMXVII one thousand, nine hundred seventeen +MCMXVIII one thousand, nine hundred eighteen +MCMXIX one thousand, nine hundred nineteen +MCMXX one thousand, nine hundred twenty +MCMXXI one thousand, nine hundred twenty one +MCMXXII one thousand, nine hundred twenty two +MCMXXIII one thousand, nine hundred twenty three +MCMXXIV one thousand, nine hundred twenty four +MCMXXV one thousand, nine hundred twenty five +MCMXXVI one thousand, nine hundred twenty six +MCMXXVII one thousand, nine hundred twenty seven +MCMXXVIII one thousand, nine hundred twenty eight +MCMXXIX one thousand, nine hundred twenty nine +MCMXXX one thousand, nine hundred thirty +MCMXXXI one thousand, nine hundred thirty one +MCMXXXII one thousand, nine hundred thirty two +MCMXXXIII one thousand, nine hundred thirty three +MCMXXXIV one thousand, nine hundred thirty four +MCMXXXV one thousand, nine hundred thirty five +MCMXXXVI one thousand, nine hundred thirty six +MCMXXXVII one thousand, nine hundred thirty seven +MCMXXXVIII one thousand, nine hundred thirty eight +MCMXXXIX one thousand, nine hundred thirty nine +MCMXL one thousand, nine hundred forty +MCMXLI one thousand, nine hundred forty one +MCMXLII one thousand, nine hundred forty two +MCMXLIII one thousand, nine hundred forty three +MCMXLIV one thousand, nine hundred forty four +MCMXLV one thousand, nine hundred forty five +MCMXLVI one thousand, nine hundred forty six +MCMXLVII one thousand, nine hundred forty seven +MCMXLVIII one thousand, nine hundred forty eight +MCMXLIX one thousand, nine hundred forty nine +MCML one thousand, nine hundred fifty +MCMLI one thousand, nine hundred fifty one +MCMLII one thousand, nine hundred fifty two +MCMLIII one thousand, nine hundred fifty three +MCMLIV one thousand, nine hundred fifty four +MCMLV one thousand, nine hundred fifty five +MCMLVI one thousand, nine hundred fifty six +MCMLVII one thousand, nine hundred fifty seven +MCMLVIII one thousand, nine hundred fifty eight +MCMLIX one thousand, nine hundred fifty nine +MCMLX one thousand, nine hundred sixty +MCMLXI one thousand, nine hundred sixty one +MCMLXII one thousand, nine hundred sixty two +MCMLXIII one thousand, nine hundred sixty three +MCMLXIV one thousand, nine hundred sixty four +MCMLXV one thousand, nine hundred sixty five +MCMLXVI one thousand, nine hundred sixty six +MCMLXVII one thousand, nine hundred sixty seven +MCMLXVIII one thousand, nine hundred sixty eight +MCMLXIX one thousand, nine hundred sixty nine +MCMLXX one thousand, nine hundred seventy +MCMLXXI one thousand, nine hundred seventy one +MCMLXXII one thousand, nine hundred seventy two +MCMLXXIII one thousand, nine hundred seventy three +MCMLXXIV one thousand, nine hundred seventy four +MCMLXXV one thousand, nine hundred seventy five +MCMLXXVI one thousand, nine hundred seventy six +MCMLXXVII one thousand, nine hundred seventy seven +MCMLXXVIII one thousand, nine hundred seventy eight +MCMLXXIX one thousand, nine hundred seventy nine +MCMLXXX one thousand, nine hundred eighty +MCMLXXXI one thousand, nine hundred eighty one +MCMLXXXII one thousand, nine hundred eighty two +MCMLXXXIII one thousand, nine hundred eighty three +MCMLXXXIV one thousand, nine hundred eighty four +MCMLXXXV one thousand, nine hundred eighty five +MCMLXXXVI one thousand, nine hundred eighty six +MCMLXXXVII one thousand, nine hundred eighty seven +MCMLXXXVIII one thousand, nine hundred eighty eight +MCMLXXXIX one thousand, nine hundred eighty nine +MCMXC one thousand, nine hundred ninety +MCMXCI one thousand, nine hundred ninety one +MCMXCII one thousand, nine hundred ninety two +MCMXCIII one thousand, nine hundred ninety three +MCMXCIV one thousand, nine hundred ninety four +MCMXCV one thousand, nine hundred ninety five +MCMXCVI one thousand, nine hundred ninety six +MCMXCVII one thousand, nine hundred ninety seven +MCMXCVIII one thousand, nine hundred ninety eight +MCMXCIX one thousand, nine hundred ninety nine +MM two thousand diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/suppletive.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/suppletive.tsv new file mode 100644 index 0000000..115460a --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/suppletive.tsv @@ -0,0 +1,83 @@ +deer +fish +sheep +foot feet +goose geese +man men +mouse mice +tooth teeth +woman women +won +child children +ox oxen +wife wives +wolf wolves +analysis analyses +criterion criteria +lbs +focus foci +percent +hertz +kroner krone +inch inches +calory calories +yen +megahertz +gigahertz +kilohertz +hertz +CC +c c +horsepower +hundredweight +kilogram force kilograms force +mega siemens +revolution per minute revolutions per minute +mile per hour miles per hour +megabit per second megabits per second +square foot square feet +kilobit per second kilobits per second +degree Celsius degrees Celsius +degree Fahrenheit degrees Fahrenheit +ATM +AU +BQ +CC +CD +DA +EB +EV +F +GB +G +GL +GPA +GY +HA +H +HL +GP +HS +KB +KL +KN +KT +KV +LM +MA +MA +MB +MC +MF +M +MM +MS +MV +MW +PB +PG +PS +S +TB +YB +ZB \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/__init__.py new file mode 100644 index 0000000..a1cf281 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/ip_prompt.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/ip_prompt.tsv new file mode 100644 index 0000000..03e2529 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/ip_prompt.tsv @@ -0,0 +1,2 @@ +IP address is +IP is \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/ssn_prompt.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/ssn_prompt.tsv new file mode 100644 index 0000000..8bbdb9f --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/ssn_prompt.tsv @@ -0,0 +1,4 @@ +ssn is SSN is +ssn is SSN is +SSN is +SSN \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/telephone_prompt.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/telephone_prompt.tsv new file mode 100644 index 0000000..6dcfb6c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/telephone/telephone_prompt.tsv @@ -0,0 +1,5 @@ +call me at +reach at +reached at +my number is +hit me up at \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/time/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/time/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/time/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/time/suffix.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/time/suffix.tsv new file mode 100644 index 0000000..026a6a9 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/time/suffix.tsv @@ -0,0 +1,12 @@ +p.m. PM +p.m PM +pm PM +P.M. PM +P.M PM +PM PM +a.m. AM +a.m AM +am AM +A.M. AM +A.M AM +AM AM diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/time/zone.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/time/zone.tsv new file mode 100644 index 0000000..0fda042 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/time/zone.tsv @@ -0,0 +1,14 @@ +cst CST +c.s.t CST +cet CET +c.e.t CET +pst PST +p.s.t PST +est EST +e.s.t EST +pt PT +p.t PT +et ET +e.t ET +gmt GMT +g.m.t GMT diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/UK_to_US.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/UK_to_US.tsv new file mode 100644 index 0000000..a1ad6b0 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/UK_to_US.tsv @@ -0,0 +1,1729 @@ +accessorise accessorize +accessorised accessorized +accessorises accessorizes +accessorising accessorizing +acclimatisation acclimatization +acclimatise acclimatize +acclimatised acclimatized +acclimatises acclimatizes +acclimatising acclimatizing +accoutrements accouterments +aeon eon +aeons eons +aerogramme aerogram +aerogrammes aerograms +aeroplane airplane +aeroplanes airplanes +aesthete esthete +aesthetes esthetes +aesthetic esthetic +aesthetically esthetically +aesthetics esthetics +aetiology etiology +ageing aging +aggrandisement aggrandizement +agonise agonize +agonised agonized +agonises agonizes +agonising agonizing +agonisingly agonizingly +almanack almanac +almanacks almanacs +aluminium aluminum +amortisable amortizable +amortisation amortization +amortisations amortizations +amortise amortize +amortised amortized +amortises amortizes +amortising amortizing +amphitheatre amphitheater +amphitheatres amphitheaters +anaemia anemia +anaemic anemic +anaesthesia anesthesia +anaesthetic anesthetic +anaesthetics anesthetics +anaesthetise anesthetize +anaesthetised anesthetized +anaesthetises anesthetizes +anaesthetising anesthetizing +anaesthetist anesthetist +anaesthetists anesthetists +anaesthetize anesthetize +anaesthetized anesthetized +anaesthetizes anesthetizes +anaesthetizing anesthetizing +analogue analog +analogues analogs +analyse analyze +analysed analyzed +analyses analyzes +analysing analyzing +anglicise anglicize +anglicised anglicized +anglicises anglicizes +anglicising anglicizing +annualised annualized +antagonise antagonize +antagonised antagonized +antagonises antagonizes +antagonising antagonizing +apologise apologize +apologised apologized +apologises apologizes +apologising apologizing +appal appall +appals appalls +appetiser appetizer +appetisers appetizers +appetising appetizing +appetisingly appetizingly +arbour arbor +arbours arbors +archaeological archeological +archaeologically archeologically +archaeologist archeologist +archaeologists archeologists +archaeology archeology +ardour ardor +armour armor +armoured armored +armourer armorer +armourers armorers +armouries armories +armoury armory +artefact artifact +artefacts artifacts +authorise authorize +authorised authorized +authorises authorizes +authorising authorizing +axe ax +backpedalled backpedaled +backpedalling backpedaling +bannister banister +bannisters banisters +baptise baptize +baptised baptized +baptises baptizes +baptising baptizing +bastardise bastardize +bastardised bastardized +bastardises bastardizes +bastardising bastardizing +battleaxe battleax +baulk balk +baulked balked +baulking balking +baulks balks +bedevilled bedeviled +bedevilling bedeviling +behaviour behavior +behavioural behavioral +behaviourism behaviorism +behaviourist behaviorist +behaviourists behaviorists +behaviours behaviors +behove behoove +behoved behooved +behoves behooves +bejewelled bejeweled +belabour belabor +belaboured belabored +belabouring belaboring +belabours belabors +bevelled beveled +bevvies bevies +bevvy bevy +biassed biased +biassing biasing +bingeing binging +bougainvillaea bougainvillea +bougainvillaeas bougainvilleas +bowdlerise bowdlerize +bowdlerised bowdlerized +bowdlerises bowdlerizes +bowdlerising bowdlerizing +breathalyse breathalyze +breathalysed breathalyzed +breathalyser breathalyzer +breathalysers breathalyzers +breathalyses breathalyzes +breathalysing breathalyzing +brutalise brutalize +brutalised brutalized +brutalises brutalizes +brutalising brutalizing +buses busses +busing bussing +caesarean cesarean +caesareans cesareans +calibre caliber +calibres calibers +calliper caliper +callipers calipers +callisthenics calisthenics +canalise canalize +canalised canalized +canalises canalizes +canalising canalizing +cancellation cancelation +cancellations cancelations +cancelling canceling +cancelled canceled +candour candor +cannibalise cannibalize +cannibalised cannibalized +cannibalises cannibalizes +cannibalising cannibalizing +canonise canonize +canonised canonized +canonises canonizes +canonising canonizing +capitalise capitalize +capitalised capitalized +capitalises capitalizes +capitalising capitalizing +caramelise caramelize +caramelised caramelized +caramelises caramelizes +caramelising caramelizing +carbonise carbonize +carbonised carbonized +carbonises carbonizes +carbonising carbonizing +carolled caroled +carolling caroling +catalogue catalog +catalogued cataloged +catalogues catalogs +cataloguing cataloging +catalyse catalyze +catalysed catalyzed +catalyses catalyzes +catalysing catalyzing +categorise categorize +categorised categorized +categorises categorizes +categorising categorizing +cauterise cauterize +cauterised cauterized +cauterises cauterizes +cauterising cauterizing +cavilled caviled +cavilling caviling +centigramme centigram +centigrammes centigrams +centilitre centiliter +centilitres centiliters +centralise centralize +centralised centralized +centralises centralizes +centralising centralizing +centre center +centres centers +centred centered +centrefold centerfold +centrefolds centerfolds +centrepiece centerpiece +centrepieces centerpieces +channelled channeled +channelling channeling +characterise characterize +characterised characterized +characterises characterizes +characterising characterizing +cheque check +chequebook checkbook +chequebooks checkbooks +chequered checkered +cheques checks +chilli chili +chimaera chimera +chimaeras chimeras +chiselled chiseled +chiselling chiseling +circularise circularize +circularised circularized +circularises circularizes +circularising circularizing +civilise civilize +civilised civilized +civilises civilizes +civilising civilizing +clamour clamor +clamoured clamored +clamouring clamoring +clamours clamors +clangour clangor +clarinettist clarinetist +clarinettists clarinetists +collectivise collectivize +collectivised collectivized +collectivises collectivizes +collectivising collectivizing +colonisation colonization +colonise colonize +colonised colonized +coloniser colonizer +colonisers colonizers +colonises colonizes +colonising colonizing +colourant colorant +colourants colorants +coloureds coloreds +colourfully colorfully +colouring coloring +colourize colorize +colourized colorized +colourizes colorizes +colourizing colorizing +colourless colorless +colours colors +colour color +commercialise commercialize +commercialised commercialized +commercialises commercializes +commercialising commercializing +compartmentalise compartmentalize +compartmentalised compartmentalized +compartmentalises compartmentalizes +compartmentalising compartmentalizing +computerise computerize +computerised computerized +computerises computerizes +computerising computerizing +conceptualise conceptualize +conceptualised conceptualized +conceptualises conceptualizes +conceptualising conceptualizing +connexion connection +connexions connections +contextualise contextualize +contextualised contextualized +contextualises contextualizes +contextualising contextualizing +cosier cozier +cosies cozies +cosiest coziest +cosily cozily +cosiness coziness +cosy cozy +councillor councilor +councillors councilors +counselled counseled +counselling counseling +counsellor counselor +counsellors counselors +crenellated crenelated +criminalise criminalize +criminalised criminalized +criminalises criminalizes +criminalising criminalizing +criticise criticize +criticised criticized +criticises criticizes +criticising criticizing +crueller crueler +cruellest cruelest +crystallisation crystallization +crystallise crystallize +crystallised crystallized +crystallises crystallizes +crystallising crystallizing +cudgelled cudgeled +cudgelling cudgeling +customise customize +customised customized +customises customizes +customising customizing +cypher cipher +cyphers ciphers +decentralisation decentralization +decentralise decentralize +decentralised decentralized +decentralises decentralizes +decentralising decentralizing +decriminalisation decriminalization +decriminalise decriminalize +decriminalised decriminalized +decriminalises decriminalizes +decriminalising decriminalizing +defenceless defenseless +defences defenses +defence defense +dehumanisation dehumanization +dehumanise dehumanize +dehumanised dehumanized +dehumanises dehumanizes +dehumanising dehumanizing +demeanour demeanor +demilitarisation demilitarization +demilitarise demilitarize +demilitarised demilitarized +demilitarises demilitarizes +demilitarising demilitarizing +demobilisation demobilization +demobilise demobilize +demobilised demobilized +demobilises demobilizes +demobilising demobilizing +democratisation democratization +democratise democratize +democratised democratized +democratises democratizes +democratising democratizing +demonise demonize +demonised demonized +demonises demonizes +demonising demonizing +demoralisation demoralization +demoralise demoralize +demoralised demoralized +demoralises demoralizes +demoralising demoralizing +denationalisation denationalization +denationalise denationalize +denationalised denationalized +denationalises denationalizes +denationalising denationalizing +deodorise deodorize +deodorised deodorized +deodorises deodorizes +deodorising deodorizing +depersonalise depersonalize +depersonalised depersonalized +depersonalises depersonalizes +depersonalising depersonalizing +deputise deputize +deputised deputized +deputises deputizes +deputising deputizing +desensitisation desensitization +desensitise desensitize +desensitised desensitized +desensitises desensitizes +desensitising desensitizing +destabilisation destabilization +destabilise destabilize +destabilised destabilized +destabilises destabilizes +destabilising destabilizing +dialled dialed +dialling dialing +dialogue dialog +dialogues dialogs +diarrhoea diarrhea +digitise digitize +digitised digitized +digitises digitizes +digitising digitizing +disc disk +discolour discolor +discolours discolors +discoloured discolored +discolouring discoloring +discs disks +disembowelled disemboweled +disembowelling disemboweling +disfavour disfavor +dishevelled disheveled +dishonour dishonor +dishonourable dishonorable +dishonourably dishonorably +dishonoured dishonored +dishonouring dishonoring +dishonours dishonors +disorganisation disorganization +disorganised disorganized +distil distill +distils distills +dramatisation dramatization +dramatisations dramatizations +dramatise dramatize +dramatised dramatized +dramatises dramatizes +dramatising dramatizing +draught draft +draughtboard draftboard +draughtboards draftboards +draughtier draftier +draughtiest draftiest +draughts drafts +draughtsman draftsman +draughtsmanship draftsmanship +draughtsmen draftsmen +draughtswoman draftswoman +draughtswomen draftswomen +draughty drafty +drivelled driveled +drivelling driveling +duelled dueled +duelling dueling +economise economize +economised economized +economises economizes +economising economizing +edoema edema +editorialise editorialize +editorialised editorialized +editorialises editorializes +editorialising editorializing +empathise empathize +empathised empathized +empathises empathizes +empathising empathizing +emphasise emphasize +emphasised emphasized +emphasises emphasizes +emphasising emphasizing +enamelled enameled +enamelling enameling +enamoured enamored +encyclopaedia encyclopedia +encyclopaedias encyclopedias +encyclopaedic encyclopedic +endeavour endeavor +endeavoured endeavored +endeavouring endeavoring +endeavours endeavors +energise energize +energised energized +energises energizes +energising energizing +enrol enroll +enrols enrolls +enthral enthrall +enthrals enthralls +epaulette epaulet +epaulettes epaulets +epicentre epicenter +epicentres epicenters +epilogue epilog +epilogues epilogs +epitomise epitomize +epitomised epitomized +epitomises epitomizes +epitomising epitomizing +equalisation equalization +equalise equalize +equalised equalized +equaliser equalizer +equalisers equalizers +equalises equalizes +equalising equalizing +eulogise eulogize +eulogised eulogized +eulogises eulogizes +eulogising eulogizing +evangelise evangelize +evangelised evangelized +evangelises evangelizes +evangelising evangelizing +exorcise exorcize +exorcised exorcized +exorcises exorcizes +exorcising exorcizing +extemporisation extemporization +extemporise extemporize +extemporised extemporized +extemporises extemporizes +extemporising extemporizing +externalisation externalization +externalisations externalizations +externalise externalize +externalised externalized +externalises externalizes +externalising externalizing +factorise factorize +factorised factorized +factorises factorizes +factorising factorizing +faecal fecal +faeces feces +familiarisation familiarization +familiarise familiarize +familiarised familiarized +familiarises familiarizes +familiarising familiarizing +fantasise fantasize +fantasised fantasized +fantasises fantasizes +fantasising fantasizing +favour favor +favourable favorable +favourably favorably +favoured favored +favouring favoring +favourite favorite +favourites favorites +favouritism favoritism +favours favors +feminise feminize +feminised feminized +feminises feminizes +feminising feminizing +fertilisation fertilization +fertilise fertilize +fertilised fertilized +fertiliser fertilizer +fertilisers fertilizers +fertilises fertilizes +fertilising fertilizing +fervour fervor +fibreglass fiberglass +fibre fiber +fibres fibers +fictionalisation fictionalization +fictionalisations fictionalizations +fictionalise fictionalize +fictionalised fictionalized +fictionalises fictionalizes +fictionalising fictionalizing +fillet filet +filleted fileted +filleting fileting +fillets filets +finalisation finalization +finalise finalize +finalised finalized +finalises finalizes +finalising finalizing +flautist flutist +flautists flutists +flavoured flavored +flavouring flavoring +flavourings flavorings +flavourless flavorless +flavour flavor +flavours flavors +flavoursome flavorsome +flyer/flier flier/flyer +foetal fetal +foetid fetid +foetus fetus +foetuses fetuses +formalisation formalization +formalise formalize +formalised formalized +formalises formalizes +formalising formalizing +fossilisation fossilization +fossilise fossilize +fossilised fossilized +fossilises fossilizes +fossilising fossilizing +fraternisation fraternization +fraternise fraternize +fraternised fraternized +fraternises fraternizes +fraternising fraternizing +fulfil fulfill +fulfilment fulfillment +fulfils fulfills +funnelled funneled +funnelling funneling +galvanise galvanize +galvanised galvanized +galvanises galvanizes +galvanising galvanizing +gambolled gamboled +gambolling gamboling +gaol jail +gaolbird jailbird +gaolbirds jailbirds +gaolbreak jailbreak +gaolbreaks jailbreaks +gaoled jailed +gaoler jailer +gaolers jailers +gaoling jailing +gaols jails +gases gasses +gauge gage +gauged gaged +gauges gages +gauging gaging +generalisation generalization +generalisations generalizations +generalise generalize +generalised generalized +generalises generalizes +generalising generalizing +ghettoise ghettoize +ghettoised ghettoized +ghettoises ghettoizes +ghettoising ghettoizing +gipsies gypsies +glamorise glamorize +glamorised glamorized +glamorises glamorizes +glamorising glamorizing +glamour glamor +globalisation globalization +globalise globalize +globalised globalized +globalises globalizes +globalising globalizing +glueing gluing +goitre goiter +goitres goiters +gonorrhoea gonorrhea +gramme gram +grammes grams +gravelled graveled +grey gray +greyed grayed +greying graying +greyish grayish +greyness grayness +greys grays +grovelled groveled +grovelling groveling +groyne groin +groynes groins +gruelling grueling +gruellingly gruelingly +gryphon griffin +gryphons griffins +gynaecological gynecological +gynaecologist gynecologist +gynaecologists gynecologists +gynaecology gynecology +haematological hematological +haematologist hematologist +haematologists hematologists +haematology hematology +haemoglobin hemoglobin +haemophilia hemophilia +haemophiliac hemophiliac +haemophiliacs hemophiliacs +haemorrhage hemorrhage +haemorrhaged hemorrhaged +haemorrhages hemorrhages +haemorrhaging hemorrhaging +haemorrhoids hemorrhoids +harbour harbor +harboured harbored +harbouring harboring +harbours harbors +harmonisation harmonization +harmonise harmonize +harmonised harmonized +harmonises harmonizes +harmonising harmonizing +homoeopath homeopath +homoeopathic homeopathic +homoeopaths homeopaths +homoeopathy homeopathy +homogenise homogenize +homogenised homogenized +homogenises homogenizes +homogenising homogenizing +honourable honorable +honourably honorably +honoured honored +honouring honoring +honours honors +honour honor +hospitalisation hospitalization +hospitalise hospitalize +hospitalised hospitalized +hospitalises hospitalizes +hospitalising hospitalizing +humanise humanize +humanised humanized +humanises humanizes +humanising humanizing +humoured humored +humouring humoring +humourless humorless +humours humors +humour humor +hybridise hybridize +hybridised hybridized +hybridises hybridizes +hybridising hybridizing +hypnotise hypnotize +hypnotised hypnotized +hypnotises hypnotizes +hypnotising hypnotizing +hypothesise hypothesize +hypothesised hypothesized +hypothesises hypothesizes +hypothesising hypothesizing +idealisation idealization +idealise idealize +idealised idealized +idealises idealizes +idealising idealizing +idolise idolize +idolised idolized +idolises idolizes +idolising idolizing +immobilisation immobilization +immobilise immobilize +immobilised immobilized +immobiliser immobilizer +immobilisers immobilizers +immobilises immobilizes +immobilising immobilizing +immortalise immortalize +immortalised immortalized +immortalises immortalizes +immortalising immortalizing +immunisation immunization +immunise immunize +immunised immunized +immunises immunizes +immunising immunizing +impanelled impaneled +impanelling impaneling +imperilled imperiled +imperilling imperiling +individualise individualize +individualised individualized +individualises individualizes +individualising individualizing +industrialise industrialize +industrialised industrialized +industrialises industrializes +industrialising industrializing +inflexion inflection +inflexions inflections +initialise initialize +initialised initialized +initialises initializes +initialising initializing +initialled initialed +initialling initialing +instal install +instalment installment +instalments installments +instals installs +instil instill +instils instills +institutionalisation institutionalization +institutionalise institutionalize +institutionalised institutionalized +institutionalises institutionalizes +institutionalising institutionalizing +intellectualise intellectualize +intellectualised intellectualized +intellectualises intellectualizes +intellectualising intellectualizing +internalisation internalization +internalise internalize +internalised internalized +internalises internalizes +internalising internalizing +internationalisation internationalization +internationalise internationalize +internationalised internationalized +internationalises internationalizes +internationalising internationalizing +ionisation ionization +ionise ionize +ionised ionized +ioniser ionizer +ionisers ionizers +ionises ionizes +ionising ionizing +italicise italicize +italicised italicized +italicises italicizes +italicising italicizing +itemise itemize +itemised itemized +itemises itemizes +itemising itemizing +jeopardise jeopardize +jeopardised jeopardized +jeopardises jeopardizes +jeopardising jeopardizing +jewelled jeweled +jeweller jeweler +jewellers jewelers +jewellery jewelry +kilogramme kilogram +kilogrammes kilograms +labelled labeled +labelling labeling +laboured labored +labourer laborer +labourers laborers +labouring laboring +labours labors +labour labor +lacklustre lackluster +legalisation legalization +legalise legalize +legalised legalized +legalises legalizes +legalising legalizing +legitimise legitimize +legitimised legitimized +legitimises legitimizes +legitimising legitimizing +levelled leveled +levelling leveling +leveller leveler +levellers levelers +libelled libeled +libelling libeling +libellous libelous +liberalisation liberalization +liberalise liberalize +liberalised liberalized +liberalises liberalizes +liberalising liberalizing +licence license +licenced licensed +licences licenses +licencing licensing +likeable likable +lionisation lionization +lionise lionize +lionised lionized +lionises lionizes +lionising lionizing +liquidise liquidize +liquidised liquidized +liquidiser liquidizer +liquidisers liquidizers +liquidises liquidizes +liquidising liquidizing +litre liter +litres liters +localise localize +localised localized +localises localizes +localising localizing +louvre louver +louvred louvered +louvres louvers +lustre luster +magnetise magnetize +magnetised magnetized +magnetises magnetizes +magnetising magnetizing +manoeuvrability maneuverability +manoeuvrable maneuverable +manoeuvre maneuver +manoeuvred maneuvered +manoeuvres maneuvers +manoeuvring maneuvering +manoeuvrings maneuverings +marginalisation marginalization +marginalise marginalize +marginalised marginalized +marginalises marginalizes +marginalising marginalizing +marshalled marshaled +marshalling marshaling +marvelled marveled +marvelling marveling +marvellous marvelous +marvellously marvelously +materialisation materialization +materialise materialize +materialised materialized +materialises materializes +materialising materializing +maximisation maximization +maximise maximize +maximised maximized +maximises maximizes +maximising maximizing +meagre meager +mechanisation mechanization +mechanise mechanize +mechanised mechanized +mechanises mechanizes +mechanising mechanizing +mediaeval medieval +memorialise memorialize +memorialised memorialized +memorialises memorializes +memorialising memorializing +memorise memorize +memorised memorized +memorises memorizes +memorising memorizing +mesmerise mesmerize +mesmerised mesmerized +mesmerises mesmerizes +mesmerising mesmerizing +metabolise metabolize +metabolised metabolized +metabolises metabolizes +metabolising metabolizing +metre meter +metres meters +micrometre micrometer +micrometres micrometers +millimetre millimeter +millimetres millimeters +centimetre centimeter +centimetres centimeters +kilometre kilometer +kilometres kilometers +militarise militarize +militarised militarized +militarises militarizes +militarising militarizing +milligramme milligram +milligrammes milligrams +millilitre milliliter +millilitres milliliters +miniaturisation miniaturization +miniaturise miniaturize +miniaturised miniaturized +miniaturises miniaturizes +miniaturising miniaturizing +minibuses minibusses +minimise minimize +minimised minimized +minimises minimizes +minimising minimizing +misbehaviour misbehavior +misdemeanour misdemeanor +misdemeanours misdemeanors +misspelt misspelled +mitre miter +mitres miters +mobilisation mobilization +mobilise mobilize +mobilised mobilized +mobilises mobilizes +mobilising mobilizing +modelled modeled +modeller modeler +modellers modelers +modelling modeling +modernise modernize +modernised modernized +modernises modernizes +modernising modernizing +moisturise moisturize +moisturised moisturized +moisturiser moisturizer +moisturisers moisturizers +moisturises moisturizes +moisturising moisturizing +monologue monolog +monologues monologs +monopolisation monopolization +monopolise monopolize +monopolised monopolized +monopolises monopolizes +monopolising monopolizing +moralise moralize +moralised moralized +moralises moralizes +moralising moralizing +motorised motorized +mould mold +moulded molded +moulder molder +mouldered moldered +mouldering moldering +moulders molders +mouldier moldier +mouldiest moldiest +moulding molding +mouldings moldings +moulds molds +mouldy moldy +moult molt +moulted molted +moulting molting +moults molts +moustache mustache +moustached mustached +moustaches mustaches +moustachioed mustachioed +multicoloured multicolored +nationalisation nationalization +nationalisations nationalizations +nationalise nationalize +nationalised nationalized +nationalises nationalizes +nationalising nationalizing +naturalisation naturalization +naturalise naturalize +naturalised naturalized +naturalises naturalizes +naturalising naturalizing +neighbour neighbor +neighbourhood neighborhood +neighbourhoods neighborhoods +neighbouring neighboring +neighbourliness neighborliness +neighbourly neighborly +neighbours neighbors +neutralisation neutralization +neutralise neutralize +neutralised neutralized +neutralises neutralizes +neutralising neutralizing +normalisation normalization +normalise normalize +normalised normalized +normalises normalizes +normalising normalizing +odour odor +odourless odorless +odours odors +oesophagus esophagus +oesophaguses esophaguses +offences offenses +offence offense +omelette omelet +omelettes omelets +optimise optimize +optimised optimized +optimises optimizes +optimising optimizing +organisational organizational +organised organized +organiser organizer +organisers organizers +organises organizes +organise organize +organising organizing +orthopaedic orthopedic +orthopaedics orthopedics +ostracise ostracize +ostracised ostracized +ostracises ostracizes +ostracising ostracizing +outmanoeuvre outmaneuver +outmanoeuvred outmaneuvered +outmanoeuvres outmaneuvers +outmanoeuvring outmaneuvering +overemphasise overemphasize +overemphasised overemphasized +overemphasises overemphasizes +overemphasising overemphasizing +oxidisation oxidization +oxidise oxidize +oxidised oxidized +oxidises oxidizes +oxidising oxidizing +paederast pederast +paederasts pederasts +paediatric pediatric +paediatrician pediatrician +paediatricians pediatricians +paediatrics pediatrics +paedophile pedophile +paedophiles pedophiles +paedophilia pedophilia +palaeolithic paleolithic +palaeontologist paleontologist +palaeontologists paleontologists +palaeontology paleontology +panelled paneled +panelling paneling +panellist panelist +panellists panelists +paralyse paralyze +paralysed paralyzed +paralyses paralyzes +paralysing paralyzing +parcelled parceled +parcelling parceling +parlour parlor +parlours parlors +particularise particularize +particularised particularized +particularises particularizes +particularising particularizing +passivisation passivization +passivise passivize +passivised passivized +passivises passivizes +passivising passivizing +pasteurisation pasteurization +pasteurise pasteurize +pasteurised pasteurized +pasteurises pasteurizes +pasteurising pasteurizing +patronise patronize +patronised patronized +patronises patronizes +patronising patronizing +patronisingly patronizingly +pedalled pedaled +pedalling pedaling +pedestrianisation pedestrianization +pedestrianise pedestrianize +pedestrianised pedestrianized +pedestrianises pedestrianizes +pedestrianising pedestrianizing +penalise penalize +penalised penalized +penalises penalizes +penalising penalizing +pencilled penciled +pencilling penciling +personalise personalize +personalised personalized +personalises personalizes +personalising personalizing +pharmacopoeia pharmacopeia +pharmacopoeias pharmacopeias +philosophise philosophize +philosophised philosophized +philosophises philosophizes +philosophising philosophizing +philtre filter +philtres filters +phoney phony +plagiarise plagiarize +plagiarised plagiarized +plagiarises plagiarizes +plagiarising plagiarizing +plough plow +ploughed plowed +ploughing plowing +ploughman plowman +ploughmen plowmen +ploughs plows +ploughshare plowshare +ploughshares plowshares +polarisation polarization +polarise polarize +polarised polarized +polarises polarizes +polarising polarizing +politicisation politicization +politicise politicize +politicised politicized +politicises politicizes +politicising politicizing +popularisation popularization +popularise popularize +popularised popularized +popularises popularizes +popularising popularizing +pouffe pouf +pouffes poufs +practise practice +practises practices +practising practicing +practised practiced +praesidium presidium +praesidiums presidiums +pressurisation pressurization +pressurise pressurize +pressurised pressurized +pressurises pressurizes +pressurising pressurizing +pretence pretense +pretences pretenses +primaeval primeval +prioritisation prioritization +prioritise prioritize +prioritised prioritized +prioritises prioritizes +prioritising prioritizing +privatisation privatization +privatisations privatizations +privatise privatize +privatised privatized +privatises privatizes +privatising privatizing +professionalisation professionalization +professionalise professionalize +professionalised professionalized +professionalises professionalizes +professionalising professionalizing +programme program +programmes programs +prologue prolog +prologues prologs +propagandise propagandize +propagandised propagandized +propagandises propagandizes +propagandising propagandizing +proselytise proselytize +proselytised proselytized +proselytiser proselytizer +proselytisers proselytizers +proselytises proselytizes +proselytising proselytizing +psychoanalyse psychoanalyze +psychoanalysed psychoanalyzed +psychoanalyses psychoanalyzes +psychoanalysing psychoanalyzing +publicise publicize +publicised publicized +publicises publicizes +publicising publicizing +pulverisation pulverization +pulverise pulverize +pulverised pulverized +pulverises pulverizes +pulverising pulverizing +pummelled pummel +pummelling pummeled +pyjama pajama +pyjamas pajamas +pzazz pizzazz +quarrelled quarreled +quarrelling quarreling +radicalise radicalize +radicalised radicalized +radicalises radicalizes +radicalising radicalizing +rancour rancor +randomise randomize +randomised randomized +randomises randomizes +randomising randomizing +rationalisation rationalization +rationalisations rationalizations +rationalise rationalize +rationalised rationalized +rationalises rationalizes +rationalising rationalizing +ravelled raveled +ravelling raveling +realisable realizable +realisation realization +realisations realizations +realise realize +realised realized +realises realizes +realising realizing +recognisable recognizable +recognisably recognizably +recognisance recognizance +recognise recognize +recognised recognized +recognises recognizes +recognising recognizing +reconnoitre reconnoiter +reconnoitred reconnoitered +reconnoitres reconnoiters +reconnoitring reconnoitering +refuelled refueled +refuelling refueling +regularisation regularization +regularise regularize +regularised regularized +regularises regularizes +regularising regularizing +remodelled remodeled +remodelling remodeling +remould remold +remoulded remolded +remoulding remolding +remoulds remolds +reorganisation reorganization +reorganisations reorganizations +reorganise reorganize +reorganised reorganized +reorganises reorganizes +reorganising reorganizing +revelled reveled +reveller reveler +revellers revelers +revelling reveling +revitalise revitalize +revitalised revitalized +revitalises revitalizes +revitalising revitalizing +revolutionise revolutionize +revolutionised revolutionized +revolutionises revolutionizes +revolutionising revolutionizing +rhapsodise rhapsodize +rhapsodised rhapsodized +rhapsodises rhapsodizes +rhapsodising rhapsodizing +rigours rigors +ritualised ritualized +rivalled rivaled +rivalling rivaling +romanticise romanticize +romanticised romanticized +romanticises romanticizes +romanticising romanticizing +rumour rumor +rumoured rumored +rumours rumors +sabre saber +sabres sabers +saltpetre saltpeter +sanitise sanitize +sanitised sanitized +sanitises sanitizes +sanitising sanitizing +satirise satirize +satirised satirized +satirises satirizes +satirising satirizing +saviour savior +saviours saviors +savour savor +savoured savored +savouries savories +savouring savoring +savours savors +savoury savory +scandalise scandalize +scandalised scandalized +scandalises scandalizes +scandalising scandalizing +sceptic skeptic +scepticism skepticism +sceptical skeptical +sceptically skeptically +sceptics skeptics +sceptre scepter +sceptres scepters +scrutinise scrutinize +scrutinised scrutinized +scrutinises scrutinizes +scrutinising scrutinizing +secularisation secularization +secularise secularize +secularised secularized +secularises secularizes +secularising secularizing +sensationalise sensationalize +sensationalised sensationalized +sensationalises sensationalizes +sensationalising sensationalizing +sensitise sensitize +sensitised sensitized +sensitises sensitizes +sensitising sensitizing +sentimentalise sentimentalize +sentimentalised sentimentalized +sentimentalises sentimentalizes +sentimentalising sentimentalizing +sepulchre sepulcher +sepulchres sepulchers +serialisation serialization +serialisations serializations +serialise serialize +serialised serialized +serialises serializes +serialising serializing +sermonise sermonize +sermonised sermonized +sermonises sermonizes +sermonising sermonizing +sheikh sheik +shovelled shoveled +shovelling shoveling +shrivelled shriveled +shrivelling shriveling +signalise signalize +signalised signalized +signalises signalizes +signalising signalizing +signalled signaled +signalling signaling +smoulder smolder +smouldered smoldered +smouldering smoldering +smoulders smolders +snivelled sniveled +snivelling sniveling +snorkelled snorkeled +snorkelling snorkeling +snowplough snowplow +snowploughs snowplow +socialisation socialization +socialise socialize +socialised socialized +socialises socializes +socialising socializing +sodomise sodomize +sodomised sodomized +sodomises sodomizes +sodomising sodomizing +solemnise solemnize +solemnised solemnized +solemnises solemnizes +solemnising solemnizing +sombre somber +specialisation specialization +specialisations specializations +specialise specialize +specialised specialized +specialises specializes +specialising specializing +spectre specter +spectres specters +spiralled spiraled +spiralling spiraling +splendour splendor +splendours splendors +squirrelled squirreled +squirrelling squirreling +stabilisation stabilization +stabilise stabilize +stabilised stabilized +stabiliser stabilizer +stabilisers stabilizers +stabilises stabilizes +stabilising stabilizing +standardisation standardization +standardise standardize +standardised standardized +standardises standardizes +standardising standardizing +stencilled stenciled +stencilling stenciling +sterilisation sterilization +sterilisations sterilizations +sterilise sterilize +sterilised sterilized +steriliser sterilizer +sterilisers sterilizers +sterilises sterilizes +sterilising sterilizing +stigmatisation stigmatization +stigmatise stigmatize +stigmatised stigmatized +stigmatises stigmatizes +stigmatising stigmatizing +storey story +storeys stories +subsidisation subsidization +subsidise subsidize +subsidised subsidized +subsidiser subsidizer +subsidisers subsidizers +subsidises subsidizes +subsidising subsidizing +succour succor +succoured succored +succouring succoring +succours succors +sulphate sulfate +sulphates sulfates +sulphide sulfide +sulphides sulfides +sulphur sulfur +sulphurous sulfurous +summarise summarize +summarised summarized +summarises summarizes +summarising summarizing +swivelled swiveled +swivelling swiveling +symbolise symbolize +symbolised symbolized +symbolises symbolizes +symbolising symbolizing +sympathise sympathize +sympathised sympathized +sympathiser sympathizer +sympathisers sympathizers +sympathises sympathizes +sympathising sympathizing +synchronisation synchronization +synchronise synchronize +synchronised synchronized +synchronises synchronizes +synchronising synchronizing +synthesise synthesize +synthesised synthesized +synthesiser synthesizer +synthesisers synthesizers +synthesises synthesizes +synthesising synthesizing +syphon siphon +syphoned siphoned +syphoning siphoning +syphons siphons +systematisation systematization +systematise systematize +systematised systematized +systematises systematizes +systematising systematizing +tantalise tantalize +tantalised tantalized +tantalises tantalizes +tantalising tantalizing +tantalisingly tantalizingly +tasselled tasseled +technicolour technicolor +temporise temporize +temporised temporized +temporises temporizes +temporising temporizing +tenderise tenderize +tenderised tenderized +tenderises tenderizes +tenderising tenderizing +terrorise terrorize +terrorised terrorized +terrorises terrorizes +terrorising terrorizing +theatregoer theatergoer +theatregoers theatergoers +theatres theaters +theatre theater +theorise theorize +theorised theorized +theorises theorizes +theorising theorizing +tonne ton +tonnes tons +towelled toweled +towelling toweling +toxaemia toxemia +tranquillise tranquilize +tranquillised tranquilized +tranquilliser tranquilizer +tranquillisers tranquilizers +tranquillises tranquilizes +tranquillising tranquilizing +tranquillity tranquility +tranquillize tranquilize +tranquillized tranquilized +tranquillizer tranquilizer +tranquillizers tranquilizers +tranquillizes tranquilizes +tranquillizing tranquilizing +tranquilly tranquility +transistorised transistorized +traumatise traumatize +traumatised traumatized +traumatises traumatizes +traumatising traumatizing +traveller traveler +travellers travelers +travelled traveled +travelling traveling +travelogue travelog +travelogues travelogs +trialled trialed +trialling trialing +tricolour tricolor +tricolours tricolors +trivialise trivialize +trivialised trivialized +trivialises trivializes +trivialising trivializing +tumour tumor +tumours tumors +tunnelled tunneled +tunnelling tunneling +tyrannise tyrannize +tyrannised tyrannized +tyrannises tyrannizes +tyrannising tyrannizing +tyre tire +tyres tires +unauthorised unauthorized +uncivilised uncivilized +underutilised underutilized +unequalled unequaled +unfavourable unfavorable +unfavourably unfavorably +unionisation unionization +unionise unionize +unionised unionized +unionises unionizes +unionising unionizing +unorganised unorganized +unravelled unraveled +unravelling unraveling +unrecognisable unrecognizable +unrecognised unrecognized +unrivalled unrivaled +unsavoury unsavory +untrammelled untrammeled +urbanisation urbanization +urbanise urbanize +urbanised urbanized +urbanises urbanizes +urbanising urbanizing +utilisable utilizable +utilisation utilization +utilise utilize +utilised utilized +utilises utilizes +utilising utilizing +valour valor +vandalise vandalize +vandalised vandalized +vandalises vandalizes +vandalising vandalizing +vaporisation vaporization +vaporise vaporize +vaporised vaporized +vaporises vaporizes +vaporising vaporizing +vapour vapor +vapours vapors +verbalise verbalize +verbalised verbalized +verbalises verbalizes +verbalising verbalizing +victimisation victimization +victimise victimize +victimised victimized +victimises victimizes +victimising victimizing +videodisc videodisk +videodiscs videodisks +vigour vigor +visualisation visualization +visualisations visualizations +visualise visualize +visualised visualized +visualises visualizes +visualising visualizing +vocalisation vocalization +vocalisations vocalizations +vocalise vocalize +vocalised vocalized +vocalises vocalizes +vocalising vocalizing +vulcanised vulcanized +vulgarisation vulgarization +vulgarise vulgarize +vulgarised vulgarized +vulgarises vulgarizes +vulgarising vulgarizing +waggon wagon +waggons wagons +watercolour watercolor +watercolours watercolors +weaselled weaseled +weaselling weaseling +westernisation westernization +westernise westernize +westernised westernized +westernises westernizes +westernising westernizing +womanise womanize +womanised womanized +womaniser womanizer +womanisers womanizers +womanises womanizes +womanising womanizing +woollen woolen +woollens woolens +woollies woolies +woolly wooly +worshipped worshiped +worshipping worshiping +worshipper worshiper +yodelled yodeled +yodelling yodeling +yoghourt yogurt +yoghourts yogurts +yoghurt yogurt +yoghurt yogurt diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/alternatives.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/alternatives.tsv new file mode 100644 index 0000000..0ee8e2c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/alternatives.tsv @@ -0,0 +1,45 @@ +Hon. Honorable +Mr. Mister +Mrs. Misses +Ms. Miss +Mr Mister +Mrs Misses +Ms Miss +AC air conditioning +AC air conditioner +AC air conditioners +AC alternating current +&Co. and Co. +&Co. and Company +Mon Monday +Tu Tuesday +Wed Wednesday +Th Thursday +Thur Thursday +Thurs Thursday +Fri Friday +Sat Saturday +Sun Sunday +Mon Mon +Tu Tu +Wed Wed +Th Th +Thur Thur +Thurs Thurs +Fri Fri +Sat Sat +Sun Sun += equals +# number +No. number +No number +NO number +NO. number +NO nitrogen monoxide +NO NO +NO. NO. +No. No. +No No +VOL Volume +VOL. Volume +TV Television diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/alternatives_all_format.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/alternatives_all_format.tsv new file mode 100644 index 0000000..449195c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/alternatives_all_format.tsv @@ -0,0 +1,14 @@ +st street +st saint +dr doctor +dr drive +mt mount +sr senior +prof professor +mt mountain +sr senior +jr junior +vol volume +rd road +ave avenue +approx approximately diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/asr.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/asr.tsv new file mode 100644 index 0000000..c067e17 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/asr.tsv @@ -0,0 +1,14713 @@ +Ph.D. p h d +Hon. honorable +& and +Mt. Mount +Maj. Major +Rev. Reverend +# hash +Gov. governor +vs. versus +vs versus +dept. department +vol volume +vol. volume +bldg. building +Bldg. Building +apt. apartment +Apt. Apartment +Σ sigma +η eta +κ kappa +ω omega +σ sigma +α alpha +ν nu +δ delta +ι iota +_ underscore +% percent sign +& ampersand +* asterisk ++ plus +/ slash += equal sign +^ circumflex +{ left brace +| vertical bar +} right brace +~ tilde +ltd limited +int'l international +$ dollar +A. A. a a +A.A. a a +A.A.A. a a a +A&A a and a +AAAI a a a i +AAAM a a a m +AAAs a a a's +AAAS a a a s +AAAW a a a w +AABA a a b a +AaB a a b +A. A. B. a a b +AAB a a b +AABC a a b c +Aabo a a b o +AABP a a b p +AABW a a b w +aac a a c +AAC a a c +AACAR a a c a r +AACC a a c c +AACCUP a a c c u p +AACMI a a c m i +AACNo a a c n o +AACR a a c r +AACS a a c s +AACSB a a c s b +AACTA a a c t a +AACUPR a a c u p r +A.A.D. a a d +AADT a a d t +AADTs a a d t's +Aadu a a d u +AAEA a a e a +AAE a a e +AAERT a a e r t +AAF a a f +AAFCA a a f c a +AAFC a a f c +AAFC's a a f c's +AAFld a a f l d +AAFPOA a a f p o a +AAGPBL a a g p b l +AAHHE a a h h e +AAI a a i +AAIB a a i b +AAIP a a i p +AAK a a k +Aap a a p +AAP a a p +AAPB a a p b +AAPC a a p c +AAPEP a a p e p +AAPG a a p g +AAPM a a p m +AAPT a a p t +A.A.R.M. a a r m +A.B.A. a b a +A. B. a b +A.B. a b +A.B.A. J. a b a j +A.B.C. a b c +A. B. G. a b g +ABG a b g +ABH a b h +ABHD a b h d +Abhi a b h i +ABK a b k +abl a b l +Abl a b l +ABL a b l +ABLV a b l v +ABM a b m +ABMC a b m c +ABN a b n +ABN's a b n's +Abp a b p +ABP a b p +ABPD a b p d +ABPI a b p i +ABPN a b p n +ABPP a b p p +ABPRS a b p r s +ABPW a b p w +Abr a b r +ABR a b r +abv a b v +ABV a b v +ABW a b w +ABX a b x +Abz a b z +A.C.A. a c a +ACA a c a +A&C a and c +A. C. a c +A.C. a c +acac a c a c +Acad a c a d +ACAD a c a d +A.C.A.P. a c a p +ACAP a c a p +ACAS a c a s +ACAZ a c a z +ACBA a c b a +ACB a c b +ACBL a c b l +ACBS a c b s +ACBSP a c b s p +ACCA a c c a +ACC a c c +ACCC a c c c +ACCJ a c c j +ACC&S a c c and s +ACC's a c c's +ACCS a c c s +ACD a c d +ACDC a c d c +A.C. D.F.C. A.F.C. a c d f c a f c +A.C.E. a c e +ACF a c f +ACFE a c f e +A.C.G. a c g +ACG a c g +ACGIH's a c g i h's +A.C.H. a c h +ACH a c h +ACHP a c h p +ACHR a c h r +ACHRE a c h r e +Achs a c h's +ACHS a c h s +ACIA a c i a +ACI a c i +ACICS a c i c s +ACIGA a c i g a +ACIP a c i p +Acis a c i's +ACIS a c i s +ACLA a c l a +A.C.L. a c l +ACL a c l +A.C.L.N. a c l n +acls a c l s +ACL's a c l's +ACLS a c l s +ACLU a c l u +ACM a c m +ACMI a c m i +ACMs a c m's +ACN a c n +ACO's a c o's +AcpA a c p a +ACP a c p +AcpB a c p b +ACPI a c p i +ACPO a c p o +ACPP a c p p +ACPSEM a c p s e m +acq a c q +A&CR a and c r +ACR a c r +ACRPS a c r p s +ACSA a c s a +ACSAC a c s a c +ACSBR a c s b r +ACSCN a c s c n +ACSEL a c s e l +ACSIA a c s i a +ACSI a c s i +AcSOC a c s o c +ACSR a c s r +ACSS a c s s +ACSS's a c s s's +A.C.T. a c t +A.C.T.A.F.L. a c t a f l +acu a c u +ACU a c u +ACU's a c u's +ACUS a c u s +ACUV a c u v +ACV a c v +ACWA a c w a +AC&W a c and w +ACW a c w +ACWM a c w m +ACWS a c w s +Acy a c y +ADAA a d a a +A&D a and d +Adab a d a b +ADAC a d a c +A. D. a d +A.D. a d +A.D.A.M. adam +AdaSL a d a s l +ADATA a d a t a +ADBAC a d b a c +ADB a d b +ADBGR a d b g r +ADBICA a d b i c a +ADCA a d c a +AdC a d c +A.D.C. a d c +ADC a d c +ADCAF a d c a f +ADCC a d c c +ADC's a d c's +ADCs a d c's +ADCY a d c y +ADDA a d d a +AD&D a d and d +ADF a d f +ADF's a d f's +ADFS a d f s +ADG a d g +ADGB a d g b +ADH a d h +ADHD a d h d +ADHM a d h m +ADHS a d h s +ADIAC a d i a c +AdK a d k +ADK a d k +Adl a d l +ADL a d l +ADLL a d l l +ADLs a d l's +A.D.M.A.C. a d m a c +adm a d m +Adm a d m +ADM a d m +ADMK a d m k +admn a d m n +ADMN a d m n +ADNAC a d n a c +ADNEC a d n e c +ADNs a d n's +AdP a d p +A.D.P. a d p +ADP a d p +ADPCM a d p c m +ADPF a d p f +ADQ a d q +ADQ's a d q's +ADR a d r +ADRC a d r c +ADRP a d r p +ADRs a d r's +ADSB a d s b +ADSL a d s l +ADSR a d s r +ADSRs a d s r's +ADSs a d s's +ADT a d t +ADTG a d t g +Adwa a d w a +ADWC a d w c +ADX a d x +AEA a e a +A&E a and e +A. E. a e +A.E. a e +AEBN a e b n +A.E.C. a e c +AEC a e c +AECL a e c l +AECR a e c r +AEC's a e c's +Aed a e d +AED a e d +AEE a e e +AEF a e f +AEG a e g +A.E.G.I.S. a e g i s +aegte a e g t e +AEHL a e h l +AEIA a e i a +AEI a e i +AEIOU a e i o u +Aeka a e k a +AEK a e k +ael a e l +AEL a e l +Aema a e m a +AEM a e m +aen a e n +AEO a e o +AEPA a e p a +AEP a e p +aere a e r e +AEre a e r e +AErn a e r n +AERN a e r n +Aert a e r t +AESA a e s a +Aes a e's +AES a e s +aet a e t +AET a e t +AETC a e t c +AEU a e u +AEV a e v +AEW a e w +AEX a e x +Afa a f a +AFA a f a +af a f +Af a f +A. F. a f +A.F. a f +AF a f +Afaf a f a f +AFAP a f a p +AFB a f b +AFCA a f c a +afc a f c +A.F.C. a f c +AFC a f c +AFCEC a f c e c +AFCO a f c o +AFC's a f c's +AFCS a f c s +AFCSThe a f c s t h e +Afd a f d +AfD a f d +AfDB a f d b +AFDD a f d d +Afe a f e +AFE a f e +Aff a f f +AFF a f f +AFG a f g +AFGM a f g m +AFH a f h +AFIA a f i a +Afi a f i +AFI a f i +AFIP a f i p +Afiq a f i q +AFI's a f i's +AFJ a f j +A.F.L. a f l +AFL a f l +AFLEG a f l e g +AFLPA a f l p a +AFLP a f l p +AFL's a f l's +AFLUA a f l u a +A. F.M.A. a f m a +AFM a f m +AFMC a f m c +AFMs a f m's +AFN a f n +AFNET a f n e t +AFNOR a f n o r +AfNS a f n s +Afo a f o +AFO a f o +AFOP a f o p +AFP a f p +AFRA a f r a +Afr a f r +AFR a f r +AFRC a f r c +AFRL a f r l +AFROTC a f r o t c +AFRTS a f r t s +AFS a f s +AFSC a f s c +AFSPC a f s p c +A. F. T. a f t +AFTRA a f t r a +AFTRA's a f t r a's +AFTRS a f t r s +AFV a f v +AFW a f w +AFWS a f w s +AFX a f x +A. G. a g +A.G. a g +AGB a g b +AGC a g c +Agco a g c o +AGCO a g c o +Agda a g d a +AGD a g d +A.G.E. a g e +AGF a g f +AG&G a g and g +agg a g g +Agi a g i +AGI a g i +AGID a g i d +AGIR a g i r +Agis a g i's +A.G.J. a g j +A. G. K. a g k +AGL a g l +agli a g l i +AGM a g m +Agn a g n +AgN a g n +AGN a g n +AGNs a g n's +A.G.P. a g p +AGP a g p +AGPL a g p l +AGPS a g p s +AGPW a g p w +AGRs a g r's +AGS a g s +AGSM a g s m +Agsu a g s u +agt a g t +AGT a g t +Agte a g t e +Agu a g u +AGU a g u +AGV a g v +A.H.A. a h a +A. H. a h +A.H. a h +A. H. C. a h c +AHC a h c +AHD a h d +Ahdhra a h d h r a +AHDR a h d r +ahe a h e +Ahe a h e +AHEC a h e c +AHF a h f +Ahirs a h i r's +Ahk a h k +AHL's a h l's +AHP a h p +AHRC a h r c +AHRS a h r s +AHSAA a h s a a +Ahsa'i a h s a i +Ahta a h t a +aht a h t +AHT a h t +AHTD a h t d +Ahu a h u +Ahva a h v a +AHV a h v +AIAA a i a a +A.I.A. a i a +A&I a and i +A. I. a i +A.I. a i +AI a i +AIAP a i a p +Aias a i a's +AIAS a i a s +AIATSIS a i a t s i s +AIAW a i a w +AIAWU a i a w u +A.I.B.A. a i b a +AIB a i b +AIBO's a i b o's +AIBS a i b s +AIC a i c +AICAR a i c a r +AICC a i c c +AICCCR a i c c c r +aici a i c i +Aicme a i c m e +AICN a i c n +AICPA a i c p a +AICP a i c p +AICPS a i c p s +AICs a i c's +AICTE a i c t e +AICUF a i c u f +AIDA a i d a +AIDN a i d n +AIEC a i e c +AIEE a i e e +Aiel a i e l +AIEP a i e p +AIFA a i f a +AIF a i f +AIFB a i f b +Aife a i f e +AIFF a i f f +AIFM a i f m +AIFMD a i f m d +AiG a i g +AIG a i g +Aigis a i g i's +AIHA a i h a +AIH a i h +AIHL a i h l +AIIMS a i i m s +Aija a i j a +Aik a i k +AIK a i k +AIKR a i k r +A.I.M. a i m +AIP a i p +A.I.R. a i r +AIs a i's +AISC a i s c +AISD a i s d +AISRI a i s r i +Aist a i s t +AIST a i s t +AIT's a i t's +Aitu a i t u +Aiud a i u d +aius a i u s +AIVC a i v c +AIW a i w +AIX a i x +A. J. a j +A.J. a j +Ajba a j b a +A.J.B. a j b +AJC a j c +A.J.E. a j e +A.J.G.C. a j g c +A. J. J. a j j +A.J.K. a j k +AJK a j k +AJKF a j k f +AJL a j l +A. J. M. a j m +Ajmi a j m i +AJN a j n +AJO a j o +Ajok a j o k +A. J. P. a j p +A.J.P. a j p +AJPW a j p w +A.J.R. a j r +AJR a j r +Ajsa a j s a +A.J.S. a j s +AJS a j s +A. J. T. a j t +A.J.T. a j t +AJT a j t +Aju a j u +AJUFE a j u f e +AJUSCO a j u s c o +AJV a j v +AJW a j w +AJWRC a j w r c +Akaa a k a a +a.k.a. a k a +aka. a k a +aka a k a +Aka. a k a +Aka a k a +AKA a k a +A&K a and k +A. K. a k +A.K. a k +AK a k +AKAP a k a p +AKAPs a k a p's +akas a k a's +AKAs a k a's +AKB a k b +AKC a k c +AKCR a k c r +AKD a k d +AKG a k g +AKG's a k g's +akh a k h +AKH a k h +AKHS a k h s +AKM a k m +AKN a k n +Akpa a k p a +AKP a k p +AKPD a k p d +AKQA a k q a +AKR a k r +A.K.S. a k s +AKS a k s +Aku a k u +AKU a k u +AKW a k w +A&L a and l +A. L. a l +A.L. a l +ALDF a l d f +ALDH a l d h +ALDS a l d s +ALDT a l d t +ALFASID a l f a s i d +ALFIDI a l f i d i +ALFTP a l f t p +Alh a l h +ALH a l h +A. L. M. a l m +A.L.P. a l p +ALPG a l p g +ALR a l r +ALSC a l s c +ALSF a l s f +ALSP a l s p +ALUs a l u's +A.L.V. a l v +ALVF a l v f +ALW a l w +ALWS a l w s +AMAA a m a a +A'ma a m a +A&M a and m +a.m. a m +a.m a m +A. M. a m +A.M. a m +AMAP a m a p +AMARC a m a r c +AMAs a m a's +amb a m b +Amb a m b +AMB a m b +AmBX a m b x +A.M.C. a m c +AMC a m c +AMC's a m c's +AMCs a m c's +Amda a m d a +Amd a m d +AMD a m d +AMDH a m d h +AMDISA a m d i s a +AMD's a m d's +A.M.E. a m e +AMF a m f +AM&FM a m and f m +A. M. G. a m g +AMG a m g +AMGTV a m g t v +Amha a m h a +AMH a m h +A.M.H.S. a m h s +A. M. J. a m j +AMJ a m j +A. M. K. a m k +A.M.K. a m k +AMK a m k +aml a m l +AML a m l +Amli a m l i +AMLS a m l s +Amlwch a m l w c h +Amm a m m +A. M. M. a m m +AMM a m m +AMMAYI a m m a y i +AMMB a m m b +AMMK a m m k +AMN a m n +AMNH a m n h +AMNRL a m n r l +AM&O a m and o +AMPK a m p k +AMPL a m p l +AMP's a m p's +Ampt a m p t +AMPTP a m p t p +amr a m r +A. M. R. a m r +AMREF a m r e f +AMSA a m s a +A&M's a and m's +Ams' a m's +amsl a m s l +AMSO's a m s o's +AMSRS a m s r s +AMS's a m s's +AMSS a m s s +AMSU a m s u +Amta a m t a +amt a m t +Amt a m t +A.M.T. a m t +AMT a m t +AMTB a m t b +AMTPAT a m t p a t +Amts a m t's +AMTV a m t v +Amu a m u +AMU a m u +AMU's a m u's +A.M.V.M. a m v m +AMW a m w +AMX a m x +AMYF a m y f +A. N. a n +A.N. a n +ANB a n b +ANBO a n b o +ANC a n c +ANCC a n c c +ANC's a n c's +andFHL a n d f h l +ANDOC a n d o c +ANDP a n d p +ANDPOP a n d p o p +andPPP a n d p p p +Anej a n e j +ANELFA a n e l f a +ANF a n f +ANFIS a n f i s +ANFP's a n f p's +ANGB a n g b +ANGPTL a n g p t l +ANGRAU a n g r a u +Angti a n g t i +Angu a n g u +Anhe a n h e +ANI a n i +ANL a n l +ANM a n m +ANP a n p +ANPP a n p p +ANPR a n p r +Anr a n r +ANR a n r +ANRC a n r c +ANREF's a n r e f's +Anrep a n r e p +ANRS a n r s +ANRW a n r w +A.N.S. a n s +ANS a n s +ANSF a n s f +ANTB a n t b +ANTM a n t m +ANTV a n t v +ANUGA a n u g a +ANWB a n w b +ANX a n x +ANZ a n z +A&O a and o +AOA a o a +AOAC a o a c +A. O. a o +A.O. a o +AOB a o b +AOC a o c +AOCB a o c b +AOCCs a o c c's +AOCE a o c e +AOD a o d +Aodh a o d h +Aodla a o d l a +AODV a o d v +AOE a o e +AOGCC a o g c c +Aogo a o g o +AOH a o h +Aoke a o k e +Aoko a o k o +AOKP a o k p +Aola a o l a +Aol a o l +AOL a o l +AOL's a o l's +AOLTV a o l t v +AOM a o m +AOMC a o m c +AONB a o n b +Aone a o n e +Aoni a o n i +Aoos a o o's +AOPA a o p a +AOP a o p +aor a o r +AOR a o r +AORs a o r's +AORS a o r s +aos a o s +AoS a o s +AOS a o s +Aotus a o t u's +AOTW a o t w +aov a o v +aovf a o v f +AOWC a o w c +A&P a and p +A. P. a p +A.P. a p +APBA a p b a +APB a p b +APBL a p b l +APCA a p c a +Apc a p c +APC a p c +APCCR a p c c r +APCh a p c h +APCR a p c r +APCRDA a p c r d a +APC's a p c's +APCs a p c's +APDA a p d a +APD a p d +APDM a p d m +APEGBC a p e g b c +APFA a p f a +APF a p f +APFCT a p f c t +APFOL a p f o l +APFSDS a p f s d s +APFUTU a p f u t u +apg a p g +APG a p g +APGAW a p g a w +Aph a p h +A.P.H. a p h +APH a p h +APHEDA a p h e d a +APHL a p h l +Apiao a p i a o +Api a p i +APi a p i +API a p i +Apic a p i c +APICv a p i c v +API's a p i's +A.P.J. a p j +APJ a p j +Apl a p l +APL a p l +APLP a p l p +APLS a p l s +Aplu a p l u +APM a p m +APML a p m l +APMR a p m r +APMSO a p m s o +APN a p n +A.P.N.C. a p n c +Apphttp a p p h t t p +Appl a p p l +APPO's a p p o's +appr a p p r +Appts a p p t's +appu a p p u +Appu a p p u +APRA's a p r a's +APRC a p r c +APR's a p r's +APRS a p r s +APRST a p r s t +A&P's a and p's +Aps a p's +APs a p's +APS a p s +APSF a p s f +APSL a p s l +APTA a p t a +AP&T a p and t +APW a p w +APX a p x +APXS a p x s +Aqa a q a +AQA a q a +AQC a q c +AQI a q i +Aql a q l +AQP a q p +Aqr a q r +A&R a and r +ARA a r a +A. R. a r +A.R. a r +A.R.B. a r b +ARD a r d +ARDF a r d f +A.R.E. a r e +A.R.F. a r f +ARF a r f +A.R.G. a r g +ArgR a r g r +ARGs a r g's +ARGT a r g t +ArH a r h +ARJ a r j +ARL a r l +ARLFC a r l f c +A. R. M. a r m +ARMv a r m v +Arnd a r n d +ARNG a r n g +ARNT a r n t +A.R.P. a r p +ARQ a r q +ARTL a r t l +A.R.U. a r u +arv a r v +Arv a r v +ARV a r v +ARVD a r v d +ARVN a r v n +ARW a r w +Arwi a r w i +ARWU a r w u +A.S.A. a s a +ASA a s a +As'ad a s a d +Asai a s a i +Asao a s a o +A.s a's +A. S. a s +A.S. a s +ASAS a s a s +Asasp a s a s p +ASAU a s a u +A.S.B. a s b +ASB a s b +ASBDA a s b d a +asbl a s b l +ASBM a s b m +ASBMH a s b m h +ASBO a s b o +A.S.C. a s c +ASC a s c +ASCB a s c b +ASCE a s c e +asci a s c i +Asci a s c i +ASCP a s c p +ASCW a s c w +A.S.D. a s d +ASD a s d +ASDIC a s d i c +Asdis a s d i's +ASE a s e +ASEC a s e c +ASEE a s e e +ASEF a s e f +ASFA a s f a +ASF a s f +Asfi a s f i +ASGA a s g a +ASG a s g +ASGC a s g c +ASGE a s g e +ASGS a s g s +A. S. H. a s h +A.S.I. a s i +ASL a s l +A.S.M. a s m +ASM a s m +ASMD a s m d +ASME a s m e +ASMIK a s m i k +ASML a s m l +ASMPH a s m p h +ASMSU a s m s u +Asn a s n +ASN a s n +Aso a s o +ASO a s o +ASP a s p +ASPCA a s p c a +Aspe a s p e +ASPTS a s p t s +Asr a s r +ASR a s r +Assn a s s n +assoc a s s o c +Assoc a s s o c +ASSPs a s s p's +ASSR a s s r +Asst a s s t +ASSU a s s u +A. S. T. a s t +ASTAT a s t a t +ASTCL a s t c l +Aste a s t e +ASTE a s t e +ASTM a s t m +ASTR a s t r +Astt a s t t +A.S.U. a s u +ASU a s u +A.S.V. a s v +ASV a s v +Aswa a s w a +ASW a s w +ASX a s x +A&T a and t +atac a t a c +Atac a t a c +Atad a t a d +Ata's a t a's +Atas a t a's +ATA's a t a's +A. T. a t +A.T. a t +ATBF a t b f +ATBs a t b's +atc a t c +A.T.C. a t c +ATC a t c +atcc a t c c +ATCC a t c c +ATCDE a t c d e +ATCL a t c l +ATDC a t d c +ATF a t f +Atg a t g +ATG a t g +ATGM a t g m +ath a t h +Ath a t h +ATH a t h +ATHN a t h n +ATIA a t i a +Atia's a t i a's +Ati a t i +ATi a t i +ATI a t i +Atid a t i d +Atiiq a t i i q +Atil a t i l +ATINC a t i n c +ATIP a t i p +Atiq a t i q +Ativ a t i v +ATJ a t j +Atka a t k a +ATK a t k +ATKN a t k n +ATK's a t k's +Atl a t l +ATL a t l +Atli a t l i +ATLY a t l y +A.T.M.A. a t m a +atm a t m +Atm a t m +ATM a t m +ATMs a t m's +ATN a t n +ATO a t o +Atos a t o's +ATO's a t o's +ATOs a t o's +ATOS a t o s +ATP a t p +atpB a t p b +A.T.Q. a t q +ATR a t r +atri a t r i +Atri a t r i +ATRP a t r p +A&T's a and t's +A.T.s a t's +A.T.S. a t s +ATS a t s +ATSC a t s c +ATSDR a t s d r +AT&SF a t and s f +ATSF a t s f +ATSIC a t s i c +AT&T a t and t +attd a t t d +ATTESA a t t e s a +ATTF a t t f +AT&T's a t and t's +ATTS a t t s +ATTWI a t t w i +ATTWX's a t t w x's +ATU's a t u's +atv a t v +ATV a t v +ATV's a t v's +ATVs a t v's +ATW a t w +AtxA a t x a +ATX a t x +ATXN a t x n +A. U. a u +A.U. a u +Aub a u b +Aubl a u b l +AUC a u c +AUVs a u v's +Auw a u w +avab a v a b +AVAC a v a c +A. V. a v +A.V. a v +AVCA a v c a +AVC a v c +AVCHD a v c h d +AVCs a v c's +AVCS a v c s +AVD a v d +avg a v g +AVG a v g +AVK a v k +AVL a v l +A. V. M. a v m +AVM a v m +Avn a v n +AVN a v n +Avo a v o +AvP a v p +AVP a v p +AVR a v r +AVRs a v r's +AVSA a v s a +Avs' a v's +Avs a v's +AVS a v s +AVSM a v s m +AVU a v u +avvo a v v o +AVX a v x +Awa a w a +A. W. A. a w a +AWA a w a +A. W. A. M. a w a m +AWAs a w a's +A. W. a w +A.W. a w +A. W. B. a w b +A.W.B. a w b +AWB a w b +AWB's a w b's +A.W.C. a w c +AWC a w c +AWD a w d +A. W. F. a w f +AWG a w g +AWGIE a w g i e +AWGN a w g n +A. W. H. a w h +AWHL a w h l +AWI a w i +AWM a w m +Awo a w o +AWP a w p +AWP's a w p's +AWR a w r +AWSA a w s a +AWU a w u +Awwa a w w a +AXAF a x a f +A. X. a x +AXS a x s +AXV a x v +A. Y. a y +A.Y. a y +AYF a y f +AYK a y k +AYP a y p +A. Z. a z +A.Z. a z +AZE a z e +Azg a z g +Azi a z i +AZI a z i +AZL a z l +azm a z m +AZN a z n +AZS a z s +AZSTA's a z s t a's +AZT a z t +AzTV a z t v +Baad b a a d +BAAG b a a g +Ba'al b a a l +Baal b a a l +BAAL b a a l +Baam b a a m +Baap b a a p +ba'as b a a's +Baat b a a t +Ba'ath b a a t h +B. A. b a +B.A. b a +B&A b and a +Bac b a c +B.A.C. b a c +BAC b a c +bae b a e +Bae b a e +BAe b a e +BAE b a e +BAFA b a f a +BAF b a f +B.A.R. b a r +BARV b a r v +BASCA b a s c a +BASEC b a s e c +BASF b a s f +B.A.S.P. b a s p +BATVG b a t v g +Bauw b a u w +B'Av b a v +BAV b a v +BBA b b a +BBAG b b a g +B&B b and b +bb b b +B. B. b b +B.B. b b +BB b b +BBB b b b +bbc b b c +B.B.C. b b c +BBC b b c +BBCBBC b b c b b c +BB&CI b b and c i +BBC&PJR b b c and p j r +BBC's b b c's +B.B.D. b b d +BBDO b b d o +BBE b b e +BBF b b f +BBFC b b f c +BBFF b b f f +BBG b b g +BBH b b h +BBI b b i +BBK b b k +BBKL b b k l +BBLB b b l b +BBL b b l +BBM b b m +BBMP b b m p +BBNG b b n g +BBN's b b n's +BBO b b o +BBP b b p +BBQ'er b b q e r +B&Bs b and b's +BB&S b b and s +BBS b b s +BBSes b b s e's +BBS's b b s's +BB&T b b and t +BBT b b t +BBTV b b t v +BBU b b u +BBVA b b v a +BbvCI b b v c i +BBV's b b v's +BBWAA b b w a a +BBWA b b w a +BBWR's b b w r's +BBYA b b y a +BBYO b b y o +BCA b c a +BCAD b c a d +BCAM b c a m +BCATP b c a t p +BCBA b c b a +BCB b c b +bc b c +B. C. b c +B.C. b c +BC b c +BCCA b c c a +BCC b c c +BCCCA b c c c a +Bcci b c c i +BCCI b c c i +BCCP b c c p +BCC's b c c's +BCDA b c d a +BCD b c d +BCEA b c e a +BCE b c e +BCE's b c e's +BCF b c f +BCG b c g +BCG's b c g's +BCHL b c h l +BCHR's b c h r's +BCHS b c h s +BCI b c i +B. C. J. b c j +BCL b c l +BCLR b c l r +BCMA b c m a +BCM b c m +BCMG b c m g +BCMHS b c m h s +BCMS b c m s +BCN b c n +BCN's b c n's +BCP b c p +BCPM b c p m +BCR b c r +BCRF's b c r f's +B.C.'s b c's +BC's b c's +BCs b c's +BCS b c s +BCSC b c s c +BCSic b c s i c +BCSN b c s n +BCT b c t +BCTC b c t c +BCU b c u +BCYP b c y p +BdA b d a +BDA b d a +bd b d +B. D. b d +B.D. b d +BD b d +BDBL b d b l +BDC b d c +BDC's b d c's +BDD b d d +Bde b d e +B. D. E. b d e +BDE b d e +BDF b d f +BDI b d i +BDJ b d j +BDK b d k +BDK's b d k's +BDMI b d m i +BDN b d n +BDNF b d n f +BDO b d o +BDO's b d o's +BDOS b d o s +BDP b d p +BDR b d r +bds b d s +BDS b d s +BDSM b d s m +BdU b d u +BDU b d u +BDV b d v +B. D. W. b d w +B.E.A. b e a +B. E. b e +B.E. b e +Bedw b e d w +B.E.E. b e e +bef b e f +B.E.F. b e f +BEF b e f +BEF's b e f's +bei b e i +Bei b e i +Beih b e i h +BEIR b e i r +Beis b e i's +bej b e j +B.E.M. b e m +BEMs b e m's +BEP b e p +Ber b e r +BER b e r +Bes b e's +B.E.S. b e s +BES b e s +B.F.A. b f a +BFA b f a +bf b f +B. F. b f +B.F. b f +BF b f +BFCA b f c a +BFC b f c +BFC's b f c's +BFDG b f d g +BFES b f e s +BFF b f f +BFFs b f f's +B. F. G. b f g +BFG b f g +BFG's b f g's +BFI b f i +BFI's b f i's +BFJA b f j a +BFKL b f k l +BFL b f l +BFM b f m +BFO b f o +BFRA b f r a +BFR b f r +BFSA b f s a +BFS b f s +BFT b f t +BFU b f u +bfy b f y +BGAB b g a b +BGA b g a +BGB b g b +B. G. b g +B.G. b g +BGC b g c +BGCI b g c i +BGD b g d +BGEA b g e a +B. G. E. b g e +BGH b g h +B. G. J. S. b g j s +BGL b g l +BglII b g l i i +BGM b g m +BGP b g p +BGRA b g r a +BGR b g r +BGRC b g r c +BGRSO b g r s o +bgs b g s +B.G.S. b g s +BGS b g s +BGSU b g s u +BGT b g t +Bgy b g y +Bha b h a +B&H b and h +B. H. b h +B.H. b h +BHCC b h c c +BHDP b h d p +BH&E b h and e +Bhe b h e +BHL b h l +BHMA b h m a +BHMO b h m o +BHMs b h m's +BHMT b h m t +BHO b h o +B. H. P. b h p +BHP b h p +BHRT b h r t +BHS b h s +BHSN b h s n +BHSU b h s u +bhttp b h t t p +BHU b h u +BHUSD b h u s d +BHVS b h v s +B. I. b i +B.I. b i +Bie b i e +BIE b i e +biedt b i e d t +Biem b i e m +B.I.G. b i g +B.I.G.'s b i g's +BiH b i h +BIH b i h +B.I.O.L.A. b i o l a +B. J. b j +B.J. b j +BJCC b j c c +BJCP b j c p +BJD b j d +B.J.F. b j f +B. J. I. b j i +BJJ b j j +BJP b j p +BJP's b j p's +BJPs b j p's +BJPS b j p s +B.J.'s b j's +B.J.T. b j t +BJT b j t +BJU b j u +BJY b j y +B.K.A. b k a +B&K b and k +B. K. b k +B.K. b k +BKCa b k c a +BKC b k c +BKF b k f +BKI b k i +BKN's b k n's +BKO b k o +B.K.R. b k r +BKR b k r +BK's b k's +BKS b k s +BKT b k t +BKTV b k t v +BKV b k v +B. L. b l +B.L. b l +BL b l +BLC b l c +BLCC b l c c +BLCN b l c n +ble b l e +bleg b l e g +blev b l e v +BLG b l g +bli b l i +BLI b l i +Blla b l l a +BLL b l l +BLM b l m +BLMC b l m c +BLOU's b l o u's +BLPP b l p p +BLR b l r +BLRC b l r c +BLR&D b l r and d +B.L.S. b l s +BLS b l s +BLT b l t +BLTF b l t f +BLTs b l t's +BLV b l v +BMAA b m a a +B.M.A. b m a +BMA b m a +BMAL b m a l +BMARC b m a r c +BMA's b m a's +B&M b and m +BMB b m b +B. M. b m +B.M. b m +BMCA b m c a +B.M.C. b m c +BMC b m c +bmd b m d +BMD b m d +BME b m e +BMF b m f +BMG b m g +BMHS b m h s +bmi b m i +BMI b m i +BMIC b m i c +BMIR b m i r +BMI's b m i's +BMIT b m i t +BMJ b m j +BMK b m k +BMKG b m k g +BMMO b m m o +BMNA b m n a +BMNH b m n h +BMNP b m n p +BMO b m o +BMP b m p +BMPs b m p's +BMR b m r +BMRs b m r's +BMSA b m s a +B.M.S. b m s +BMS b m s +bmt b m t +B. M. T. b m t +B.M.T. b m t +BMT b m t +BMT's b m t's +BMU b m u +B.M.V. b m v +BMV b m v +B. M. W. b m w +BMW b m w +BMW M b m w +BMW's b m w's +BMX b m x +BNA b n a +B&N b and n +bnb b n b +BNB b n b +B. N. b n +B.N. b n +BNC b n c +BNCI b n c i +BNCM b n c m +BNCT b n c t +BND b n d +BNDY b n d y +BNET b n e t +BNF b n f +BNFL b n f l +BNI b n i +BNIC b n i c +BNL b n l +BNO b n o +BNP b n p +BNR b n r +BNRC b n r c +BNS b n s +BNSF b n s f +BNST b n s t +BNT b n t +BNU b n u +BNY b n y +BNZ b n z +Boac b o a c +BOAC b o a c +B&O b and o +B. O. b o +B.O. b o +Boc b o c +BoC b o c +BOC b o c +BOCs b o c's +BOCS b o c s +boj b o j +Boj b o j +BOJ b o j +B.O.M.B. b o m b +B&O's b and o's +B.O.S. b o s +BOYZZ b o y z z +BPA b p a +BPB b p b +bp b p +B. P. b p +B.P. b p +BP b p +BPC b p c +BPD b p d +BPER b p e r +B.P.H. b p h +BPI b p i +BPJ b p j +BPK b p k +bpl b p l +BPL b p l +B. P. M. b p m +BPM b p m +BPMN b p m n +B.P.N. b p n +BPN b p n +B.P.O. b p o +BPO b p o +BPPA b p p a +BPP b p p +BPR b p r +B.P.R.D. b p r d +BPRD b p r d +BPSA b p s a +BP's b p's +BPS b p s +BPSK b p s k +BPUP b p u p +BPV b p v +BPY b p y +B&Q b and q +BQB b q b +B. Q. b q +BRAC's b r a c's +BRAM b r a m +BRBDP b r b d p +Brbic b r b i c +B. R. b r +B.R. b r +BRBR b r b r +BRBs b r b's +BRCA b r c a +BRC b r c +BRCC b r c c +BRD b r d +BRF b r f +BRGF b r g f +BRGM b r g m +BRHS b r h s +B.R.I.C.K. b r i c k +BRK b r k +BRL b r l +BRM b r m +BRMS b r m s +BRN b r n +BRNC b r n c +Brne b r n e +Brno b r n o +BRNV b r n v +BRP b r p +BRPS b r p s +BRRA b r r a +BRS b r s +BRSCC's b r s c c's +BRT b r t +BRTs b r t's +BRTS b r t s +BRVM b r v m +BRW b r w +BRWK b r w k +Brza b r z a +BRZ b r z +brzu b r z u +brzy b r z y +Brzyk b r z y k +BSAA b s a a +B.S.A. b s a +BSA b s a +BSAC b s a c +BsaL b s a l +BSBA b s b a +BSB b s b +BSBI b s b i +B. s b's +B.'s b's +Bs b's +B. S. b s +B.S. b s +BS b s +B.S.C. b s c +BSC b s c +BSCL b s c l +bsd b s d +BSD b s d +BSDE b s d e +BSE b s e +BSET b s e t +BSFA b s f a +BSF b s f +BSG b s g +BSH b s h +BSIB b s i b +BSI b s i +BSK b s k +BSL b s l +BSM b s m +BSME b s m e +BSN b s n +BSNL b s n l +BSOG b s o g +BSPA b s p a +BSP b s p +BSPP b s p p +BSR b s r +BSRN b s r n +BSS b s s +BSU b s u +BSV b s v +BSX b s x +BTA b t a +BTAF b t a f +BTB b t b +BTBD b t b d +bt b t +B. T. b t +B.T. b t +BT b t +BTCA b t c a +BTC b t c +BTCC b t c c +BTD b t d +BTF b t f +BTG b t g +BTH b t h +BTK b t k +BTL b t l +BTM b t m +BTN b t n +BTNK b t n k +BTOB b t o b +BTO b t o +BTP b t p +BTRC b t r c +BTRDA b t r d a +Btry b t r y +btsan b t s a n +BT's b t's +BTS b t s +BTT b t t +BTU b t u +btus b t u s +BTUs b t u's +BTV b t v +B.U. b u +B.U.M. b u m +BUV b u v +BVA b v a +BVB b v b +BVB's b v b's +B. V. b v +B.V. b v +BVC b v c +BVFB b v f b +BVI b v i +BVM b v m +BVO b v o +BVR b v r +BVRC b v r c +BVT b v t +BVVL b v v l +Bwa b w a +BWAF b w a f +B&W b and w +BWB b w b +BWBR b w b r +B. W. b w +B.W. b w +B.W.C. b w c +BWC b w c +BWF b w f +BWFC b w f c +BWHBC b w h b c +B. W. I. b w i +BWI b w i +BWIR b w i r +BWO b w o +BWR b w r +BWS b w s +BWT b w t +BWTs b w t's +BWV b w v +BWW b w w +bwwtv b w w t v +Bxa b x a +BX b x +Bxe b x e +BXML b x m l +BYA's b y a's +BYB b y b +B. Y. b y +byc b y c +BYC b y c +BYD b y d +BYFC b y f c +BYG b y g +bygd b y g d +Byk b y k +byn b y n +BYOB b y o b +Byo b y o +BYO b y o +Byou b y o u +BYR b y r +Byrl b y r l +bySLC b y s l c +BYST b y s t +byt b y t +B.Y.U. b y u +BYU b y u +BYU's b y u's +BYX b y x +BYZ b y z +B.Z. b z +BZ b z +BZD b z d +bzhed b z h e d +Bzik b z i k +BZK b z k +BZP b z p +Bzyb b z y b +CAA c a a +CAAC c a a c +CAAC's c a a c's +CAAHEP c a a h e p +CAAM c a a m +CAAS c a a s +CAASE c a a s e +Caat c a a t +CAAT c a a t +C. A. c a +C.A. c a +CA c a +C&A c and a +CAC c a c +CA&CC c a and c c +CACC c a c c +CACM c a c m +Cadw c a d w +CADW c a d w +caeca c a e c a +Cae c a e +CAE c a e +C.A.F.B. c a f b +CAFTT c a f t t +C. A. G. c a g +CAG c a g +CAGM c a g m +C. A. I. c a i +C. A. J. c a j +CALUX c a l u x +CAMC c a m c +CAMLG c a m l g +C.A.M.'s c a m's +C. A. N. c a n +Capt. captain +C. A. R. c a r +cas c a s +Cas c a's +C.A.S. c a s +CAS c a s +Casc c a s c +CASC c a s c +CASD c a s d +CASF c a s f +CASG c a s g +CASQ c a s q +C. A. T. c a t +C.A.T. c a t +CATV c a t v +C.A.W. c a w +CBA c b a +CBBB c b b b +CBB c b b +CBBC c b b c +CBBS c b b s +CBCA c b c a +cb c b +C. B. c b +C.B. c b +CB c b +cbc c b c +CBC c b c +CB&CNS c b and c n s +CBC's c b c's +CBCS c b c s +CBDA c b d a +CBD c b d +CBDs c b d's +C.B.E. c b e +CBE c b e +CBEF c b e f +CBE's c b e's +CBF c b f +CBGB c b g b +CBGB's c b g b's +CBGBs c b g b's +CBG c b g +CBH c b h +CBHG c b h g +CBI c b i +CBI's c b i's +CbiXS c b i x s +CBKB c b k b +CBK c b k +CBKMT c b k m t +CBKRT c b k r t +CBL c b l +CBL's c b l's +CBM c b m +CBMS c b m s +CBN c b n +CBN's c b n's +CBNT c b n t +CBO c b o +CBOT c b o t +CBP c b p +CBPE c b p e +CB&Q c b and q +CBR c b r +CBRE c b r e +CBRN c b r n +CBRNE c b r n e +CBSA c b s a +CBs c b's +CBS c b s +C.B.S.E. c b s e +CBSE c b s e +CBS's c b s's +CBT c b t +CBTC c b t c +CBTU c b t u +CBU c b u +CBUT c b u t +CBV c b v +CBWT c b w t +CBX c b x +CBYT c b y t +CBZH c b z h +'c c +CcaA c c a a +CCAA c c a a +CCAAT c c a a t +cca c c a +Cca c c a +CCA c c a +CCAF c c a f +CCAP c c a p +CCAR c c a r +CCAS c c a s +CCB c c b +CCBCC c c b c c +CCBE c c b e +CCCA c c c a +C&C c and c +C. C. c c +C.C. c c +CCCE c c c e +CCCF c c c f +CCCP c c c p +CCC's c c c's +CCDB c c d b +CCDC c c d c +ccd c c d +CCD c c d +CCDev c c d e v +CCDI c c d i +CCDI's c c d i's +CCDM c c d m +CCDR c c d r +CCD's c c d's +CCDs c c d's +CCEd c c e d +C.C.E.D. c c e d +CCF c c f +CCFO c c f o +CCG c c g +C.C.G.S. c c g s +CCGS c c g s +CCHA c c h a +CCHC c c h c +C.C.H. c c h +CCH c c h +CCHD c c h d +CCHS c c h s +C.C.I.A.A. c c i a a +CCIAA c c i a a +CCi c c i +CCI c c i +CCID c c i d +CCIE c c i e +CCIH c c i h +CCITT c c i t t +CCJ c c j +CCK c c k +CCLC c c l c +CCL c c l +CCMA c c m a +ccm c c m +CCM c c m +CCMM c c m m +CCMP c c m p +CCMS c c m s +CCN c c n +CCNY c c n y +CCO c c o +CCOKC c c o k c +CCOO c c o o +CCOP c c o p +C.C.O.W.E. E.T.F. c c o w e e t f +CCP c c p +CCPD c c p d +CCPL c c p l +CCPN c c p n +CCP's c c p's +CCQ c c q +CCRC c c r c +CCR c c r +CCRCs c c r c's +CCRH c c r h +CCRS c c r s +CCSA c c s a +CCSC c c s c +CC's c c's +CCS c c s +CCSD c c s d +C.C.S.D.N.Y. c c s d n y +C.C.S.M. c c s m +CCSN c c s n +CCSR c c s r +CCS's c c s's +cct c c t +CCT c c t +CCTF c c t f +CCTRN c c t r n +CCTT c c t t +CCTV c c t v +CCUA c c u a +CCU c c u +ccus c c u s +CCV c c v +CCVG c c v g +C. C. W. c c w +CCWD c c w d +CDA c d a +CDAT c d a t +CDB c d b +CDBS c d b s +CDCA c d c a +C&D c and d +cdc c d c +CDC c d c +cd c d +C. D. c d +C.D. c d +CD c d +CDC's c d c's +CDDB c d d b +CD&DR c d and d r +CDE c d e +CDF c d f +CDFI c d f i +CDFW c d f w +CDH c d h +CDi c d i +CDI c d i +CDISC c d i s c +CDI's c d i's +CDK c d k +CDKN c d k n +CDL c d l +CDLI c d l i +CDLS c d l s +CDMA c d m a +CDM c d m +cDNA c d n a +cdnas c d n a s +cDNAs c d n a's +CDN c d n +CDN's c d n's +CDO c d o +CDOs c d o's +CDPC c d p c +CDP c d p +CDP's c d p's +CDPs c d p's +CD&R c d and r +CDR c d r +cds c d s +CD's c d's +CDs c d's +CDS c d s +CDSG c d s g +CDSP c d s p +CDSPCo c d s p c o +CdtA c d t a +CDT c d t +CdTe c d t e +CDTi c d t i +CDTI c d t i +CDT's c d t's +CDTV c d t v +CDTV's c d t v's +CDU c d u +CDU's c d u's +CD&V c d and v +CDV c d v +CDW c d w +CDWP c d w p +Cec c e c +CEC c e c +c'e c e +ce c e +C'e c e +Ce c e +C. E. c e +C.E. c e +CE c e +CECP c e c p +CEDA c e d a +CEDO c e d o +CEDR c e d r +CEDS c e d s +CEEBA c e e b a +CEEB c e e b +CEEH c e e h +Ceel c e e l +CEEOL c e e o l +CEESA c e e s a +CEFC c e f c +CEF c e f +Cefn c e f n +CEFP c e f p +CEFR c e f r +CEGB c e g b +C.E.G. c e g +C. E. H. c e h +CEI c e i +C&EI's c and e i's +C. E. M. c e m +C&EN c and e n +cen c e n +Cen c e n +CEN c e n +ceo c e o +C.E.O. c e o +CEO c e o +Ceol c e o l +CEOP c e o p +Ceorl c e o r l +Ceos c e o's +CEO's c e o's +CEOs' c e o's +CEOs c e o's +C.E.P. c e p +cer c e r +Cer c e r +CER c e r +CERP c e r p +CERPER c e r p e r +CERS c e r s +ces c e s +Ces c e's +C.E.S. c e s +CES c e s +CESL c e s l +CESO c e s o +CETB c e t b +C. E. T. c e t +CEV c e v +CEVCP c e v c p +CEVG c e v g +CEW c e w +Cex c e x +CeX c e x +CEX c e x +CEZ c e z +CEZMS c e z m s +CFAB c f a b +Cfa c f a +CFA c f a +CFAS c f a s +C. F. B. c f b +CFB c f b +CFBG c f b g +CFBISD c f b i s d +CFB's c f b's +C&F c and f +CFC c f c +cf c f +C. F. c f +C.F. c f +CF c f +CFC's c f c's +CFDA c f d a +CFD c f d +C.F.E. c f e +CFE c f e +CFFC c f f c +C. F. F. c f f +CFF c f f +C. F. H. c f h +CFH c f h +CFHS c f h s +CFI c f i +CFIT c f i t +CFJL c f j l +CFJR c f j r +CFL c f l +CFL's c f l's +CFM c f m +CFMEU c f m e u +CFMI c f m i +CFND c f n d +CFNY c f n y +C.F.O.A. c f o a +CFO c f o +CFOP c f o p +CFOs c f o's +CFPI c f p i +CFPL c f p l +CFQ c f q +CFRB c f r b +C.F.R. c f r +CFR c f r +CFRP c f r p +CFS c f s +CFSCI c f s c i +CFSP c f s p +CFTA c f t a +CFT c f t +CFTO c f t o +CFTR c f t r +CFTs c f t's +CFTXOP c f t x o p +CFU c f u +CFYN c f y n +CFZ c f z +CGA c g a +CGAP c g a p +CGB c g b +C&G c and g +CGCFAD c g c f a d +C. G. c g +C.G. c g +CGCRI c g c r i +CGCS c g c s +CGD c g d +CGDK c g d k +CGE c g e +CGF c g f +CGFP c g f p +CGG c g g +CGI c g i +CGIL c g i l +CGKD c g k d +cgl c g l +CGL c g l +CGP c g p +CGPM c g p m +CGPME c g p m e +CGPW c g p w +CGR c g r +CGRP c g r p +CGS c g s +CGSS c g s s +CGT c g t +CGTG c g t g +CGTIC c g t i c +CGU c g u +CGW c g w +CGW's c g w's +C.H.B. c h b +CHBM c h b m +CHBX c h b x +C&H c and h +C.H.C. c h c +C. H. c h +C.H. c h +CHCO c h c o +CHD c h d +CHDK c h d k +CHDS c h d s +CHF c h f +CHFI c h f i +C.H.G. c h g +CHGO c h g o +CHH c h h +CHHOTO c h h o t o +CHHS c h h s +Chhu c h h u +Chirs c h i r's +CHISMS c h i s m s +CHISZ c h i s z +CHK c h k +CHL c h l +CHLR c h l r +CHL's c h l's +C. H. M. c h m +CHM c h m +CHN c h n +CHP c h p +CHP'den c h p d e n +CHPs c h p's +CHPS c h p s +CHR c h r +CHRDI c h r d i +CHRGD c h r g d +C.H.S. c h s +CHS c h s +CHSE c h s e +CHT c h t +CHV c h v +CHX c h x +Chy c h y +CHYR c h y r +CIAC c i a c +Cia c i a +C.I.A. c i a +CIA c i a +CIC c i c +ci c i +Ci c i +C. I. c i +C.I. c i +CI c i +Cicic c i c i c +CICL c i c l +C.I.C.M. c i c m +CICO c i c o +CIC's c i c's +C.I.D. c i d +CIFF c i f f +CIFL c i f l +CIHE c i h e +CIHR c i h r +cii c i i +CII c i i +CIID c i i d +CIJ c i j +CIKM c i k m +C.I.L. c i l +CIL c i l +cim c i m +Cim c i m +CIM c i m +Cio c i o +C.I.O. c i o +CIO c i o +CIOs c i o's +CIOT c i o t +CIPCA c i p c a +Cip c i p +C.I.P. c i p +CIP c i p +CIPD c i p d +CIPFA c i p f a +CIPM c i p m +CISA c i s a +CISC c i s c +cis c i s +CIs c i's +CIS c i s +CITB c i t b +cit c i t +CIT c i t +CiTD c i t d +CITM c i t m +CITN c i t n +CITP c i t p +citS c i t s +CITV c i t v +CITWF c i t w f +CitX c i t x +Ciuc c i u c +Ciu c i u +CiU c i u +CIU c i u +CIUT c i u t +CIVD c i v d +CIW c i w +CIWS c i w s +CIX c i x +Cixi c i x i +CIY c i y +CIZN c i z n +CJA c j a +CJBHL c j b h l +CJCA c j c a +CJCB c j c b +CJC c j c +C. J. c j +C.J. c j +CJD c j d +C. J. E. c j e +CJFL c j f l +CJFT c j f t +CJFX c j f x +CJGC c j g c +CJGO c j g o +CJHL c j h l +CJK c j k +CJL c j l +CJLS c j l s +CJM c j m +CJNT c j n t +CJOC c j o c +CJP c j p +C. J. R. c j r +C.J.R. c j r +CJR c j r +CJSC c j s c +C. J. S. c j s +CJS c j s +CJT c j t +CJUF c j u f +CJWC c j w c +CKAC c k a c +CKC c k c +C. K. c k +C.K. c k +CK c k +CKD c k d +CKE c k e +CKLM c k l m +CKLQ c k l q +CKLW c k l w +CKS c k s +CKSO c k s o +CKVL c k v l +C. K. W. c k w +CKWK c k w k +ckx c k x +CKXX c k x x +CKY c k y +CLAC c l a c +CLA c l a +CLAS c l a s +CLBs c l b's +clc c l c +CLC c l c +cl c l +C. L. c l +C.L. c l +CL c l +CLCN c l c n +CLD c l d +Cle c l e +CLE c l e +CLF c l f +C.L.G. c l g +CLG c l g +CLHIA c l h i a +CLHS c l h s +CLI c l i +CLK c l k +cllr c l l r +CLM c l m +CLMD c l m d +CLMP c l m p +CL&N c l and n +Clo c l o +CL&P c l and p +CLP c l p +CLRC c l r c +C. L. R. c l r +C.L.R. c l r +CLR c l r +CLRK c l r k +CLSA c l s a +CLSC c l s c +CLs c l's +CLS c l s +CLT c l t +CLTD c l t d +CLTPA c l t p a +CLTs c l t's +Clu c l u +CLU c l u +CLUK c l u k +CLV c l v +Clwyd c l w y d +CLX c l x +C&MA c and m a +CMAC c m a c +C.M.A. c m a +CMA c m a +CMAL c m a l +CMAP's c m a p's +CMAS c m a s +CMAT c m a t +C.M.B. c m b +CMB c m b +CMCC c m c c +CMCCDI c m c c d i +C.M.C. c m c +CMC c m c +cm c m +C. M. c m +C.M. c m +CM c m +CMCs c m c's +CMCS c m c s +CMD c m d +Cmde c m d e +CMDISE c m d i s e +cmdr c m d r +CMEC c m e c +CME c m e +CMF c m f +CMFS c m f s +C. M. G. c m g +C.M.G. c m g +CMG c m g +CMHC c m h c +CMH c m h +CMHR c m h r +CMI c m i +CMJ c m j +CMLA c m l a +cml c m l +C.M.L. c m l +CML c m l +CMLDP c m l d p +CMLL c m l l +CMLL's c m l l's +CMM c m m +CMMI c m m i +CMMs c m m's +cmn c m n +CMO c m o +CMO's c m o's +CMPC c m p c +CMP c m p +CMPS c m p s +CMPSO c m p s o +CMPV c m p v +CMQ c m q +CMQT c m q t +C. M. R. c m r +CMR c m r +CMSAF c m s a f +C.M.S. c m s +CMS c m s +C.M.S.L. c m s l +CMT c m t +Cmte c m t e +CMT's c m t's +CMTU c m t u +CMUCL c m u c l +CMU c m u +CMU's c m u's +CMV c m v +CMVM c m v m +CMVSS c m v s s +CMWF c m w f +CMXXII c m x x i i +CMYK c m y k +CNA c n a +CNBC c n b c +CNB c n b +CNC c n c +cn c n +C. N. c n +C.N. c n +CN c n +CNC's c n c's +CND c n d +CNDL c n d l +CNEB c n e b +CNE c n e +CNF c n f +CNFE c n f e +CNFK c n f k +CNG c n g +CNI c n i +CNJ c n j +CNJF c n j f +CNK c n k +C. N. L. c n l +C.N.L. c n l +CNL c n l +CNMs c n m's +CNN c n n +CNO c n o +CNPC c n p c +CNP c n p +cnr c n r +CNR c n r +CNRMA c n r m a +CNRS c n r s +CNSA c n s a +CNs c n's +CNS c n s +CNSS c n s s +CNSW c n s w +CNTA c n t a +CNT c n t +CNTE c n t e +CNTI c n t i +CNTK c n t k +CNTN c n t n +CNTs c n t's +CNTS c n t s +CNTV c n t v +CNU c n u +Cnut's c n u t's +CNV c n v +CNVs c n v's +CNW c n w +CNWS c n w s +CNZ c n z +COA c o a +C&O c and o +C. O. c o +C.O. c o +Co. company +C.O.D. c o d +Col. colonel +C.O.M.L. c o m l +CPAC c p a c +CPA c p a +CPA's c p a's +CPB c p b +CPBL c p b l +C&P c and p +CPC c p c +cp c p +C. P. c p +C.P. c p +CP c p +CPC's c p c's +CPCs c p c's +CPCS c p c s +CPD c p d +CPDM c p d m +CPD's c p d's +CPEB c p e b +CPEC c p e c +C.P.E. c p e +CPE c p e +CPEO c p e o +CPF c p f +CPFO c p f o +CPG c p g +CPHC c p h c +CPH c p h +CPI c p i +CPIFL c p i f l +CPIM c p i m +CPI's c p i's +CPJ c p j +CPL c p l +CPLP c p l p +CPM c p m +CPN c p n +CPNI c p n i +cpo c p o +C.P.O. c p o +CPO c p o +CPOE c p o e +CPPA c p p a +CPPCC c p p c c +CPP c p p +CPPE c p p e +CPPIB c p p i b +CPPM c p p m +CPQ c p q +CPR c p r +CPRS c p r s +CPSA c p s a +CPSC c p s c +CPS c p s +CPSF c p s f +CPSK c p s k +CPSL c p s l +CPSU c p s u +CPTA c p t a +CPT c p t +CPTM c p t m +CPUC c p u c +CPU c p u +CPUID c p u i d +CPUSA c p u s a +CPUs c p u's +CPUT c p u t +CPVA c p v a +CPV c p v +CPW c p w +CQAC c q a c +CQBR c q b r +CQC c q c +cq c q +CQ c q +CQD c q d +CQDs c q d's +CQM c q m +CQO c q o +CQU c q u +C. R. A. c r a +CRA c r a +C.R.A.Z.Y. c r a z y +CRBC c r b c +CRC c r c +cr c r +C. R. c r +C.R. c r +CR c r +Cre c r e +CRE c r e +CRF c r f +CRG c r g +CRHK c r h k +CRH's c r h's +CRHS c r h s +CRJ c r j +crk c r k +Crkva c r k v a +crkve c r k v e +CRL c r l +CRLs c r l's +crm c r m +CRM c r m +CRNA c r n a +CRN c r n +Crne c r n e +Crni c r n i +Crno c r n o +CRP c r p +CRPF c r p f +CRRF c r r f +CRs c r's +CRS c r s +CRSP c r s p +CRST c r s t +CRTC c r t c +CRT c r t +CRTC's c r t c's +CRUA c r u a +CRWRC c r w r c +CRW's c r w's +CRX c r x +CSAC c s a c +csa c s a +Csa c s a +C.S.A. c s a +CSA c s a +CSAH c s a h +Csak c s a k +Csaky c s a k y +CSAR c s a r +CSAT c s a t +CSAV c s a v +CSB c s b +C&S c and s +C.S.C. c s c +CSC c s c +CSCD c s c d +CSCE c s c e +CSCR c s c r +cs c s +C.s c's +Cs c's +C. S. c s +C.S. c s +CS c s +CSC's c s c's +CSDA c s d a +CSDB c s d b +CSD c s d +CSDL c s d l +CSDN c s d n +CSDP c s d p +CSD's c s d's +CSDS c s d s +CSE c s e +C&SF c and s f +CSF c s f +CSFL c s f l +CSFSO c s f s o +CSGA c s g a +csg c s g +CSG c s g +CSG's c s g's +CSH c s h +CSHL c s h l +CSIC c s i c +CsI c s i +CSI c s i +CSIDC c s i d c +CSILUS c s i l u s +CSIO c s i o +CSIR c s i r +CSIRO c s i r o +CSis c s i's +CSIs c s i's +CSIS c s i s +CSIU c s i u +CSKA c s k a +CSKA's c s k a's +CSK c s k +CSK's c s k's +CSL c s l +CSLI c s l i +CSM c s m +CSNB c s n b +CSNBX c s n b x +CSN c s n +CSN's c s n's +CSOB c s o b +CSO c s o +CSO's c s o's +CSOs c s o's +cspA c s p a +csp c s p +CSP c s p +CSPH c s p h +CSPI c s p i +CSPRA c s p r a +CSPs c s p's +CSRA c s r a +CSR c s r +CSRS c s r s +CSRT c s r t +CSRTT c s r t t +CSSA c s s a +CSS c s s +CSSD c s s d +CSSE c s s e +CSSL c s s l +CSSNCs c s s n c's +CSSR c s s r +CSSS c s s s +CSST c s s t +CSTB c s t b +CST c s t +CSTF c s t f +CSTI c s t i +CSTS c s t s +CSUB c s u b +CSU c s u +CSU's c s u's +Csuz c s u z +CSV c s v +CSW c s w +CSWF c s w f +CSWIP c s w i p +CSWS c s w s +CSX c s x +CSX's c s x's +CSXT c s x t +C.T.A. c t a +CTA c t a +CTAF c t a f +CTARL c t a r l +CTA's c t a's +CTAs c t a's +CTBT c t b t +CTBTO c t b t o +C&T c and t +CTC c t c +ct c t +C. T. c t +C.T. c t +CT c t +CTD c t d +cte c t e +CTE c t e +CTEQ c t e q +CTF c t f +CTGF c t g f +CTI c t i +CTLA c t l a +CTL c t l +CTLLS c t l l s +CTLs c t l's +CTM c t m +CTN c t n +CTO c t o +CTP c t p +ctr c t r +CTRG c t r g +Ctrip c t r i p +CTRL c t r l +CTRP c t r p +CTR's c t r's +CTS c t s +CTSD c t s d +CTSF c t s f +CTSG c t s g +CTUC c t u c +CTU c t u +CTUG c t u g +CTU's c t u's +CTVA c t v a +ctv c t v +CTV c t v +CTV's c t v's +CTX c t x +CTZ c t z +cu c u +C. U. c u +CU c u +C.U.L. c u l +C. U. S. c u s +C. U. T. c u t +CUW c u w +Cuyp c u y p +CVA c v a +CVAK c v a k +CVARG c v a r g +CVB c v b +C&V c and v +CVC c v c +cvcp c v c p +'cv c v +cv c v +C. V. c v +C.V. c v +CV c v +CVD c v d +CVEs c v e's +CVG c v g +CVI c v i +CVJM c v j m +CVM c v m +C.V.N. c v n +CVN c v n +C.V.O. c v o +CVO c v o +CVODE c v o d e +CVOs c v o's +CVP c v p +CVR c v r +CVs c v's +CVS c v s +CVSNT c v s n t +CVT c v t +CVTs c v t's +CVV c v v +CVVHDF c v v h d f +CVW c v w +CVZ c v z +CWA c w a +CWBI c w b i +CWBL c w b l +CWC c w c +C. W. c w +C.W. c w +CWD c w d +CWDS c w d s +C. W. E. c w e +CWE c w e +Cwej c w e j +Cwele c w e l e +CWF c w f +CWGC c w g c +CWG c w g +CWI c w i +C. W. K. c w k +cwm c w m +CWM c w m +CWO c w o +CWPA c w p a +CWR c w r +cwrt c w r t +CWRU c w r u +CWSAC c w s a c +CW's c w's +CWS c w s +CWTS c w t s +CWUR c w u r +C. W. Y. c w y +CWYFL c w y f l +CXC c x c +CXCL c x c l +CXCR c x c r +C.X. c x +C. Y. c y +C.Y. c y +cyl c y l +Cyn c y n +CYOG c y o g +CyP c y p +CYP c y p +Cyrl c y r l +Cys c y's +CYS c y s +CYSK c y s k +CYSY c y s y +czci c z c i +cz c z +C.Z. c z +CZ c z +CZE c z e +CZM c z m +CZW c z w +czy c z y +DAAD d a a d +Daai d a a i +D. A. d a +D.A. d a +D&AD d and a d +D. A. H. d a h +D.A.M. d a m +D.A.N.C.E. dance +D. A. T. d a t +Dav d a v +D.A.V. d a v +DAV d a v +DAV's d a v's +dBa d b a +DBA d b a +DBC d b c +DBCS d b c s +db d b +D. B. d b +D.B. d b +DB d b +DBE d b e +DBI d b i +DBL d b l +DBLE d b l e +DBLP d b l p +DBM d b m +DBMS d b m s +DBR d b r +DB's d b's +D. B. S. d b s +DBS d b s +DBU d b u +DBYC d b y c +DBZ d b z +DCA d c a +DCAF d c a f +DCAM d c a m +DCB d c b +DCCC d c c c +DCCCD d c c c d +DCC d c c +DCCG d c c g +DCCs d c c's +D&C d and c +dc d c +D. C. d c +D.C. d c +DC d c +DCE d c e +DCFC d c f c +DCF d c f +DCG d c g +DCH d c h +DCI d c i +DCK d c k +D.C.L. d c l +DCL d c l +DCMA d c m a +DCM d c m +DCMF d c m f +DCMI d c m i +DCMS d c m s +DCO d c o +DCom d c o m +DCOM d c o m +DCP d c p +DCR d c r +D.C.'s d c's +DC's d c's +DCs d c's +DCS d c s +DCSD d c s d +DCSO d c s o +DCSPER d c s p e r +DCSS d c s s +DCTA d c t a +DCT d c t +DCU d c u +DCUO d c u o +DCUs d c u's +DDA d d a +DDB d d b +D.D.C. d d c +DDC d d c +D&D d and d +D. D. d d +D.D. d d +DDF d d f +DDG d d g +DDHH d d h h +DDi d d i +DDI d d i +DDK d d k +DDLJ d d l j +DDM d d m +DDMS d d m s +DDN d d n +DDNOS d d n o s +Ddoc d d o c +DDO d d o +DDP d d p +DDR d d r +DDRMAX d d r m a x +DDSB d d s b +D.D.S. d d s +DDS d d s +DDT d d t +DDTV d d t v +Ddu d d u +DDWFTTW d d w f t t w +DDX d d x +D.E.A.F. d e a f +Deah d e a h +DEA's d e a's +DECC d e c c +DEC d e c +DECS d e c s +DEDD d e d d +D. E. d e +D.E. d e +deg d e g +D. E. G. d e g +DEG d e g +DEH d e h +DEHP d e h p +dei d e i +Dei d e i +DeI d e i +DEI d e i +Deijm d e i j m +Deip d e i p +D. E. J. d e j +D.E.R. d e r +dez d e z +Dez d e z +Dfa d f a +DFA d f a +DFB d f b +D.F.C. d f c +DFC d f c +DFCs d f c's +D. F. d f +D.F. d f +DFD's d f d's +DfE d f e +D.F.E. d f e +DFE d f e +DfES d f e s +DFF d f f +DFG d f g +DFJ d f j +DFL d f l +DFM d f m +DFOA d f o a +DFO d f o +DFP d f p +DFPs d f p's +DFRC d f r c +DFR d f r +DFs d f's +DFS d f s +DFT d f t +DFW d f w +dfx d f x +DFX d f x +DGAC d g a c +DGA d g a +DGALS d g a l s +DGAP d g a p +DGB d g b +DGCA d g c a +DGC d g c +DGCR d g c r +D&G d and g +D. G. d g +D.G. d g +D. G. E. d g e +DGE d g e +DGERT d g e r t +DGF d g f +DGFI d g f i +DGH d g h +DGK d g k +D. G. L. R. d g l r +DGM d g m +DGMO d g m o +DGP d g p +DGPS d g p s +DGR d g r +Dha d h a +DHA d h a +DHB d h b +DHBs d h b's +DHCP d h c p +D&H d and h +dh d h +D. H. d h +D.H. d h +DH d h +DHEA d h e a +d'HEC d h e c +dhe d h e +DHE d h e +Dheku d h e k u +DHFS d h f s +DHHC d h h c +DHH d h h +DHL d h l +DHMO d h m o +DHNC d h n c +D.H.N. d h n +DHNS d h n s +Dhod d h o d +dhol d h o l +Dhol d h o l +Dhoo d h o o +Dhor d h o r +Dhou d h o u +DHP d h p +DHPH d h p h +DHQ d h q +dhr d h r +D.H.R. d h r +DHRS d h r s +DHS d h s +DHSs d h s's +DHSS d h s s +DHT d h t +DHV d h v +DHW d h w +DHX d h x +D.I. d i +DIFC d i f c +DISD d i s d +DIW d i w +DIX d i x +diy d i y +DiY d i y +D.I.Y. d i y +DIY d i y +Diyn d i y n +DIYs d i y's +D. J. A. d j a +Djam d j a m +dj d j +D. J. d j +D.J. d j +DJ d j +DJ'ed d j e d +Djem d j e m +D.J.H. d j h +Dji d j i +DJI d j i +D.J.N. d j n +Djo d j o +D'Jok d j o k +djr d j r +DJR d j r +D.J.s d j's +DJ's d j's +DJs d j's +D. J. S. d j s +D.J.T. d j t +DjVu d j v u +DJVU d j v u +DjVus d j v u's +D. J. Y. d j y +DKC d k c +D. K. d k +D.K. d k +DKI d k i +DKIM d k i m +DKK d k k +DKNY d k n y +DKPP d k p p +D. K. R. d k r +DKW d k w +dla d l a +DLA d l a +DLC d l c +DLD d l d +dl d l +D. L. d l +D.L. d l +DL d l +DLE d l e +D.L.F. d l f +DLF d l f +DLFN d l f n +DLG d l g +D. L. H. d l h +dlia d l i a +DLI d l i +DLJ d l j +DLJ's d l j's +DLK d l k +DLL d l l +DL&LR d l and l r +DLL's d l l's +DLLs d l l's +DLM d l m +DLMPST d l m p s t +dlo d l o +DLP d l p +DLPFC d l p f c +DLP's d l p's +DLR d l r +DLS d l s +DLSS d l s s +DLSU d l s u +DLSU's d l s u's +DLT d l t +DL&W d l and w +DLZ d l z +DMAA d m a a +DMA d m a +DMB d m b +DMBT d m b t +DMCA d m c a +DMCC d m c c +DMC d m c +DMCM d m c m +D.M.C.'s d m c's +D&M d and m +DMD d m d +DMDK d m d k +D. M. d m +D.M. d m +DM d m +DMDS d m d s +DMed d m e d +DM&E d m and e +DME d m e +DMF d m f +DMG d m g +DMGT d m g t +DMHS d m h s +DMI d m i +DMK d m k +DML d m l +DMN d m n +DMO d m o +DMOFD d m o f d +DMOZ d m o z +DMP d m p +DMRC d m r c +DMR d m r +DMRG d m r g +DMs d m's +DMS d m s +DMSII d m s i i +DMT d m t +DMTF d m t f +DMU d m u +DMUs d m u's +DMV d m v +DMX d m x +DMZ d m z +Dna d n a +DnA d n a +DNA d n a +DNADTC d n a d t c +DNAITV d n a i t v +DNAJA d n a j a +DNAJC d n a j c +DnaJ d n a j +DNAN d n a n +DNB d n b +DNBY d n b y +DNC d n c +DNCG d n c g +DND d n d +D. N. d n +D.N. d n +Dnepr d n e p r +dnes d n e s +Dnes d n e's +DNES d n e s +DNF d n f +DNF's d n f's +DNFs d n f's +DNG d n g +DNJO d n j o +DNKA d n k a +DNK d n k +DNM d n m +DNMT d n m t +DNP d n p +DNQ d n q +DNR d n r +DNSBLs d n s b l's +dns d n s +D.N.S. d n s +DNS d n s +DNSS d n s s +DNT d n t +DNU d n u +DNVP d n v p +D.O.A. d o a +D.O.A.'s d o a's +DOAS d o a s +D.O.B. d o b +D.O.C. d o c +DOCG d o c g +dod d o d +Dod d o d +DoD d o d +DOD d o d +D. O. d o +D.O. d o +DOD's d o d's +D.O.E. d o e +DOGNY d o g n y +DOHC d o h c +D'oh d o h +DOJ d o j +DOJ's d o j's +dok d o k +Dok d o k +domt d o m t +Domt d o m t +Dop d o p +DOP d o p +DOPR d o p r +DOSAAF d o s a a f +DOTC d o t c +dotCMS d o t c m s +DOTD d o t d +DOTP d o t p +DotSVN d o t s v n +Douw d o u w +DOW d o w +DOXO d o x o +dozd d o z d +d'OZ d o z +Dozhd d o z h d +DPAA's d p a a's +DPA d p a +DPB d p b +DPD d p d +dp d p +D. P. d p +D.P. d p +DP d p +DPG d p g +DPJ d p j +DPMA d p m a +DPM d p m +DPN d p n +DPNM d p n m +DPO d p o +dpon d p o n +DPP d p p +DPP's d p p's +DPPX d p p x +DPR d p r +DPRK d p r k +DPRK's d p r k's +DProf d p r o f +DPRP d p r p +DPRT d p r t +DPs d p's +DPS d p s +DPSS d p s s +DPSSL d p s s l +DPT d p t +DPW d p w +DPW's d p w's +DPX d p x +DPYS d p y s +D.Q. d q +DQOY d q o y +DQV d q v +DRB d r b +DRBG d r b g +DRB's d r b's +DRCA d r c a +drc d r c +DRC d r c +DRDA d r d a +DRDC d r d c +DRD d r d +Dr. doctor +D. R. d r +D.R. d r +DRG d r g +dri d r i +Dri d r i +DRI d r i +DRIs d r i's +DRITTES d r i t t e s +Driu d r i u +Driv d r i v +DRJTBC d r j t b c +DRK d r k +DRL d r l +DRLR d r l r +DRMC d r m c +D.R.M. d r m +DRM d r m +Drnis d r n i's +Dro d r o +DRO d r o +DRP d r p +Drs. doctors +DR's d r's +DRS d r s +Drska d r s k a +DRT d r t +druj d r u j +DSA d s a +DsbA d s b a +DS&BB d s and b b +DSB d s b +DSBL d s b l +D.S.C. d s c +DSC d s c +D.S.C.H. d s c h +DSCM d s c m +DSCR d s c r +DSCS d s c s +DSDB d s d b +DSD d s d +DSDM d s d m +dsDNA d s d n a +DSDP d s d p +ds d s +Ds d's +D. S. d s +D.S. d s +DS d s +DSDS d s d s +DSG d s g +DSGi d s g i +DSIC d s i c +dsi d s i +DSi d s i +DSI d s i +DSIF d s i f +DSIR d s i r +DSJ d s j +dsl d s l +D. S. L. d s l +DSL d s l +DSLR d s l r +DSMB d s m b +D.S.M. d s m +DSM d s m +DSMs d s m's +dsn d s n +D&SNGRR d and s n g r r +DSNP d s n p +D.S.O. d s o +DSO d s o +DSPA d s p a +DSP d s p +DSR d s r +DS's d s's +DSS d s s +DST d s t +dsu' d s u +DSU d s u +DSV d s v +DSW d s w +DSX d s x +DSZG d s z g +D.T.A. d t a +DTA d t a +DTC d t c +DTC's d t c's +DTCs d t c's +DTDC d t d c +DTD d t d +DTDs d t d's +D. T. d t +D.T. d t +DT&E d t and e +DTE d t e +DTEK d t e k +DTES d t e s +DTG d t g +DTH d t h +DTIC d t i c +DT&I d t and i +DTI d t i +DTLA d t l a +dtl d t l +DTL d t l +DTLLS d t l l s +DTLS d t l s +DTM d t m +DTO d t o +DTPA d t p a +DTPC d t p c +DTRA d t r a +DTR d t r +DTRS d t r s +DTs d t's +DTS d t s +DTT d t t +D.T.U. d t u +DTU d t u +DTV d t v +dty d t y +D.U. d u +dva d v a +Dva d v a +DVA d v a +DVB d v b +DVBE d v b e +DVBIC d v b i c +DVC d v c +DVCs d v c's +DVCS d v c s +dvd d v d +DVD d v d +DVD's d v d's +DVDs d v d's +dv d v +D. V. d v +D.V. d v +DV d v +dve d v e +dvfb d v f b +DVFS d v f s +DVGS d v g s +DVHS d v h s +dvi d v i +DVI d v i +dvije d v i j e +Dvin d v i n +Dvir d v i r +DVLA d v l a +DVLA's d v l a's +DVL d v l +DVM d v m +DVP d v p +DVR d v r +DVRJRBC d v r j r b c +DVR's d v r's +DVSC d v s c +DVS d v s +DVT d v t +DVV d v v +DVVT d v v t +DVWK d v w k +Dwa d w a +DWA d w a +D&W d and w +D. W. d w +D.W. d w +Dwedw d w e d w +DWET d w e t +DWF d w f +DWG d w g +D.W.H. d w h +Dwi d w i +DWI d w i +DWM d w m +DWN d w n +DWNU d w n u +DWP d w p +dwr d w r +DWR d w r +DWRFC d w r f c +DWS d w s +DWT d w t +DWTS d w t s +dwur d w u r +dxa d x a +DXA d x a +dx d x +D.X. d x +DX d x +DXED d x e d +DXers d x e r's +DXII d x i i +DXK d x k +DXR d x r +DXZ d x z +Dydd d y d d +dy d y +Dy d y +D. Y. d y +DY d y +DYFI d y f i +DYIS d y i s +DYNC d y n c +dypl d y p l +dyr d y r +DYSP d y s p +DZBB d z b b +DZB d z b +dz d z +DZEC d z e c +DZEM d z e m +dzis d z i s +DZNE d z n e +Dzog d z o g +Dzor d z o r +DZRH d z r h +DZRH's d z r h's +DZS d z s +Dzus d z u's +DZXL d z x l +DZZ d z z +EAAB e a a b +EAA e a a +EAAP e a a p +E. A. C. e a c +EAC e a c +EACEF e a c e f +EACS e a c s +EAD e a d +E. A. e a +E.A. e a +EAF e a f +EAGE e a g e +E.A.H. e a h +EAH e a h +EAI e a i +EAIE e a i e +E. A. J. e a j +EAMs e a m's +EAP e a p +E.A.R.L. e a r l +E.A.R.T.H. e a r t h +EASA e a s a +EASB e a s b +eas e a s +Eas e a's +EA's e a's +EAS e a s +Eav e a v +EAX e a x +E. B. e b +E.B. e b +EBL e b l +Ebn e b n +EBNF e b n f +EBOV e b o v +E&BR e and b r +EBR e b r +Ebru e b r u +EBS e b s +EBSL e b s l +EBT e b t +EBU e b u +EBV e b v +E.B.W. e b w +EBWs e b w's +EBZR e b z r +ECAC e c a c +ECA e c a +ECAR e c a r +ECB e c b +ECBJ e c b j +ECBL e c b l +ECCA e c c a +ECCC e c c c +ecc e c c +ECC e c c +ECCI e c c i +Eccl e c c l +ECCRFA e c c r f a +ECCW e c c w +ECD e c d +ec e c +Ec e c +E. C. e c +E.C. e c +EC e c +ece e c e +Ece e c e +ECE e c e +Ecem e c e m +ECFA e c f a +ECF e c f +ECFR e c f r +ECFU e c f u +ecg e c g +ECG e c g +ECH e c h +ECHL e c h l +ECHR e c h r +ECI e c i +ECJ e c j +ECLAC e c l a c +ECLA e c l a +ECLC e c l c +ECL e c l +ECLI e c l i +ECLSS e c l s s +ecma e c m a +ECMA e c m a +ECMC e c m c +ECMDB e c m d b +E.C.M. e c m +ECM e c m +ECMI e c m i +ECML e c m l +ECMLRUS e c m l r u s +ECMWF e c m w f +ECN e c n +ECNP e c n p +ECNs e c n's +ECNS e c n s +ECNZ e c n z +ECP&DA's e c p and d a's +ECP e c p +ECPR e c p r +ECQ e c q +ECR e c r +eCRM e c r m +ECRR e c r r +ECR's e c r's +ECSA e c s a +EC's e c's +E. C. S. e c s +E.C.S. e c s +ECS e c s +ect e c t +E. C. T. e c t +ECT e c t +ECTLO e c t l o +ECTN e c t n +ECTS e c t s +ECV e c v +ECVs e c v's +ECWA e c w a +ECW e c w +edb e d b +EDB e d b +EDC e d c +E. D. e d +E.D. e d +EDF e d f +Edh e d h +EDH e d h +edhe e d h e +EDHF e d h f +EDIB e d i b +EDI e d i +EDK e d k +EDL e d l +EDMA e d m a +EDM e d m +EDMs e d m's +EDMW e d m w +Edmx e d m x +edn e d n +Edn e d n +EDN e d n +EDP e d p +EDQH e d q h +E.D.R. e d r +EDRP e d r p +EDSA e d s a +EDSC e d s c +eds e d s +Eds e d's +E. D. S. e d s +EDS e d s +EDSP e d s p +EDTA e d t a +EDT e d t +EDVAC e d v a c +EDWDO e d w d o +Edw e d w +EDXRD e d x r d +EEA e e a +EEAF e e a f +EEAS e e a s +EEBC e e b c +Eeb e e b +EEC e e c +EECS e e c s +E. E. e e +E.E. e e +Eef e e f +EEF e e f +Eega e e g a +EEI e e i +EEI's e e i's +EEJF e e j f +EELC e e l c +eene e e n e +EEOC e e o c +EEPCO e e p c o +eep e e p +EEP e e p +EERC e e r c +EERI e e r i +Ees e e's +EES e e s +EEST e e s t +EEUU e e u u +eeuw e e u w +Eeuw e e u w +EEZs e e z's +EFA e f a +EFAS e f a s +EFCC e f c c +efc e f c +EFC e f c +EFCJ e f c j +EFD e f d +ef e f +Ef e f +E. F. e f +E.F. e f +EF e f +E. F. J. e f j +E. F. K. e f k +EFL e f l +EFM e f m +EFNB e f n b +EFPA e f p a +EFP e f p +EFQUEL e f q u e l +EFR e f r +EFRU e f r u +EFSA e f s a +eFS e f s +Efs e f's +EFSET e f s e t +EFTPOS e f t p o s +EFUA e f u a +Efu e f u +EFV e f v +EFVR e f v r +EFVS e f v s +E. F. W. e f w +EFX e f x +EFY e f y +E. G. A. e g a +EGA e g a +Egba e g b a +EGBA e g b a +EGBT e g b t +EGC e g c +EGD e g d +EGF e g f +egfl e g f l +EGFL e g f l +e.g. for example +e. g. for example +EGFR e g f r +EG&G e g and g +E. G. H. e g h +Egi e g i +EGL e g l +EGM e g m +E&GR e and g r +EGR e g r +EGREM e g r e m +EGSC e g s c +EGX e g x +egy e g y +EGY e g y +Egyl e g y l +EHAAT e h a a t +EHAs e h a's +EHC e h c +EHCI e h c i +EHD e h d +E. H. e h +E.H. e h +ehf e h f +EHF e h f +E. H. M. e h m +EHP e h p +EHRC e h r c +EHR e h r +EHS e h s +EHSH e h s h +Ehttp e h t t p +EIA e i a +EIC e i c +EiCs e i c's +EIC's e i c's +E. I. e i +E.I. e i +EIES e i e s +eIF e i f +EIF e i f +EIFF e i f f +EIF's e i f's +EIT e i t +EITI e i t i +Eitr e i t r +EITs e i t's +EIU e i u +E. J. A. e j a +EJB e j b +EJBs e j b's +EJC e j c +ej e j +E. J. e j +E.J. e j +EJ e j +E. J. H. e j h +E.J.H. e j h +EJH e j h +EJHL e j h l +EJMA e j m a +E. J. M. e j m +EJML e j m l +E. J. R. e j r +E.J.S. e j s +EJSM e j s m +EJV e j v +EKB e k b +EKBO e k b o +EKCO's e k c o's +EKD e k d +E. K. e k +E.K. e k +EKF e k f +EKG e k g +eki e k i +EKIZ e k i z +EKL e k l +EKLR e k l r +Ekma e k m a +EKNU e k n u +Ekow e k o w +eks e k s +Eks e k's +EKS e k s +E. K. U. e k u +EKU e k u +E. L. C. e l c +E. L. e l +E.L. e l +ELH e l h +ELTs e l t's +ELUs e l u's +ELV e l v +EMA e m a +EMAS e m a s +Embd e m b d +EMB e m b +eMC e m c +EMC e m c +EMD e m d +EMD's e m d's +EMDs e m d's +EMDS e m d s +Emea e m e a +EMEA e m e a +E&M e and m +E. M. e m +E.M. e m +EMF e m f +Emge e m g e +EMG e m g +EMLL e m l l +E.M.M. e m m +EMNLP e m n l p +EMP e m p +emra e m r a +EMR e m r +EMSC e m s c +E.M.S. e m s +EMTE e m t e +EMT e m t +EMV e m v +E&NA e and n a +Enas e n a's +ENAS e n a s +EnBW e n b w +ENC e n c +EncFS e n c f s +Encyc e n c y c +E.N.D. e n d +ENEC e n e c +E. N. e n +E.N. e n +Enes e n e's +engl e n g l +Engl e n g l +ENGM e n g m +Engr e n g r +Eni e n i +ENI e n i +enn e n n +Enn e n n +ENP e n p +ENR e n r +ens e n s +Ens e n's +ENS e n s +Enso e n s o +ENSO e n s o +ENTJs e n t j's +ENTP e n t p +ENTV e n t v +EOC e o c +EOC's e o c's +EoD e o d +EOD e o d +E. O. e o +E.O. e o +EOFFTV e o f f t v +EOG e o g +Eois e o i's +EOKA e o k a +eok e o k +Eole e o l e +Eol e o l +EOL e o l +Eolss e o l s's +Eom e o m +EOOW e o o w +eop e o p +eoptta e o p t t a +Eora e o r a +EOR e o r +Eorl e o r l +Eors e o r's +EOSHD e o s h d +EOTC e o t c +EOTDC e o t d c +EOU e o u +E.O.W. e o w +E&P e and p +Epe e p e +E. P. E. e p e +EPEG e p e g +ep e p +Ep e p +E. P. e p +E.P. e p +EP e p +EPF e p f +EPFL e p f l +EPG e p g +EPHAR e p h a r +EPHB e p h b +Eph e p h +EPH e p h +Ephs e p h's +EPIA e p i a +epi e p i +Epi e p i +EPI e p i +E.P.J. e p j +EPL e p l +EPLF e p l f +EPM e p m +EPOC e p o c +ePO e p o +EPO e p o +epos' e p o's +epos e p o s +Epos e p o's +EPRDF e p r d f +EPR e p r +EPRI e p r i +EPRO e p r o +EPSC e p s c +E.P.s e p's +EPT e p t +EPV e p v +EPZA e p z a +EPZ e p z +EQAO e q a o +eq e q +Eq e q +EQ e q +EQG e q g +EQN e q n +EQP e q p +EQS e q s +EQT e q t +E.R.A. e r a +ERAF e r a f +ERAU e r a u +ERCB e r c b +ERC e r c +ERCES e r c e s +ERCIM e r c i m +ERCS e r c s +E. R. D. e r d +E. R. e r +E.R. e r +ERF e r f +ERH e r h +ERJs e r j's +ERK e r k +ERP e r p +ERPM e r p m +ERPMRP e r p m r p +ERRC e r r c +E. R. R. e r r +E.R.R. e r r +ERSAR e r s a r +ERT e r t +Eru e r u +ERU e r u +ERUUF e r u u f +Eruv e r u v +Erv e r v +ERW e r w +ES3 e s three +Esa e s a +E.S.A. e s a +ESA e s a +Esam e s a m +Esa's e s a's +ESA's e s a's +Esat e s a t +ESB e s b +ESC e s c +ESCHL e s c h l +ESCs e s c's +Esd e s d +ESD e s d +ESDI e s d i +E.S.E. e s e +E. S. e s +E.S. e s +ESF e s f +ESFG e s f g +E.S.G. e s g +ESG e s g +ESGR e s g r +esi e s i +ESI e s i +ESL e s l +ESM e s m +ESMLC e s m l c +ESMT e s m t +ESNAGI e s n a g i +ESN e s n +ESPA e s p a +ESPC e s p c +ESPD e s p d +ESP e s p +espn e s p n +ESPN e s p n +ESPNhttp e s p n h t t p +ESPN's e s p n's +ESPNU e s p n u +ESPNW's e s p n w's +esq e s q +Esq e s q +Esq. esquire +Esraa e s r a a +Esra e s r a +ESRB e s r b +ESRC e s r c +Esref e s r e f +ESR e s r +ESRF e s r f +Esri e s r i +ESRI e s r i +ESSR e s s r +estd e s t d +Estd e s t d +EST e s t +ESTP e s t p +esu e s u +ESU e s u +ESU's e s u's +Esva e s v a +ESV e s v +ESWAT e s w a t +ESW e s w +Etad e t a d +ETA e t a +ETAF e t a f +ETA's e t a's +ETASU e t a s u +ETB e t b +ETBs e t b's +E.T.C. e t c +ETCO's e t c o's +ETCS e t c s +ETCSL e t c s l +ETD e t d +E. T. e t +E.T. e t +ETF e t f +ETFs e t f's +ETFS e t f s +Et'hem e t h e m +ETH e t h +Eti e t i +ETI e t i +ETIM e t i m +ETIM's e t i m's +etj e t j +ETJ e t j +ETL e t l +ETNZ e t n z +Eto e t o +ETO e t o +ETRM e t r m +ETRS e t r s +ETSEIB e t s e i b +ets e t s +Ets e t's +ET's e t's +ETs e t's +ETS e t s +ett e t t +Ett e t t +ETT e t t +Ettre e t t r e +Etts e t t's +ETU e t u +ETV e t v +Etz e t z +EUA e u a +EUB e u b +EUBs e u b's +EUBS e u b s +EUCAP e u c a p +EUCE e u c e +EUC e u c +EUCOM's e u c o m's +EUDEC e u d e c +EUD e u d +E. U. e u +E.U. e u +EU e u +eup e u p +EUP e u p +Eurwg e u r w g +EUSA e u s a +Euse e u s e +EU's e u's +EUS e u s +EUSEW e u s e w +Eusi e u s i +EUV e u v +EUX e u x +EVAAP e v a a p +EvaGT e v a g t +EVDC e v d c +EVD e v d +EVDO e v d o +E. V. e v +E.V. e v +EVEX e v e x +EVIT e v i t +evl e v l +EVL e v l +EVM e v m +Evna e v n a +EVN e v n +EVNS e v n s +evnt e v n t +E.V.O. e v o +E.V.O.L. e v o l +EVP e v p +EVR e v r +EVs e v's +EVS e v s +evv e v v +EVV e v v +Ewa's e w a's +EWD e w d +EWEB e w e b +E. W. e w +E.W. e w +EWF e w f +EWF's e w f's +Ewha e w h a +EWH e w h +E.W.K. e w k +EWLA e w l a +EWL e w l +EWO e w o +EWOs e w o's +EWP e w p +EW's e w's +EWS e w s +EWTN e w t n +Ewu e w u +EWU e w u +excl e x c l +exd e x d +E.X. e x +exhb e x h b +exh e x h +EXI e x i +EXIF e x i f +EXIN e x i n +EXL e x l +EXLM e x l m +E. X. M. e x m +Exptl e x p t l +EXR e x r +exsul e x s u l +EXT e x t +EYA e y a +EYCN e y c n +EYD e y d +E&Y e and y +E. Y. e y +EY e y +Eyk e y k +eyu e y u +Ezaa e z a a +ez e z +Ez e z +EZ e z +Ezh e z h +EZH e z h +EZO e z o +EZR e z r +EZTV e z t v +Ezu e z u +EZW e z w +Ezy e z y +Faaa f a a a +faa f a a +Faa f a a +FAA f a a +faama f a a m a +FAANP f a a n p +FabH f a b h +FABM f a b m +FabR f a b r +F.A.B.'s f a b's +F.A.C.E. f a c e +FACR f a c r +F.A.C.S. f a c s +FADD f a d d +Fadl f a d l +FADS f a d s +FAEM f a e m +fa f a +Fa f a +F. A. f a +F.A. f a +FA f a +Faf f a f +FAFG f a f g +Fafhrd f a f h r d +FAFL f a f l +F. A. G. f a g +F.A.H. f a h +fai f a i +Fai f a i +FAI f a i +FAIM f a i m +FAIP f a i p +FAIPs f a i p's +Faiq f a i q +fajn f a j n +FAK f a k +Fal f a l +FAL f a l +F.A.M.E. f a m e +F.A.M. f a m +FAQ f a q +FARC's f a r c's +FARDC f a r d c +F.A.R. f a r +farw f a r w +FASA f a s a +FASB f a s b +fasc f a s c +Fasc f a s c +fas f a s +Fas f a's +FAs f a's +F.A.S. f a s +FAS f a s +FASG f a s g +FasL f a s l +FASRE f a s r e +FATF f a t f +FAUP f a u p +FAVC f a v c +FBA f b a +FBD f b d +FBFA f b f a +F. B. f b +F.B. f b +FB f b +F.B.G.s f b g's +F.B.I. f b i +FBI f b i +FBI's f b i's +FBK f b k +FBL f b l +FBMW f b m w +FBN f b n +FBO f b o +FBOs f b o's +FBPA f b p a +FBR f b r +FBS f b s +FBSN f b s n +FBSU f b s u +FBT f b t +FCA f c a +fcb f c b +FCB f c b +FCBF f c b f +F.C.B.I. f c b i +FCBL f c b l +FCBs f c b's +F.C.C. f c c +FCC f c c +FCD f c d +F&C f and c +fc f c +F. C. f c +F.C. f c +FCF f c f +FCGR f c g r +F. C. H. f c h +F.C.H.L. f c h l +FCIAC f c i a c +FCIC f c i c +FCI f c i +FCIP f c i p +FCK f c k +FCL f c l +FCNM f c n m +FCNZ f c n z +FCO f c o +FCP f c p +F.C.P.R.E.M.I.X. f c p r e m i x +FCPS f c p s +FCRLA f c r l a +F.C.'s f c's +FC's f c's +FCS f c s +FCT f c t +FCU f c u +FCUL f c u l +FCV f c v +F. C. W. f c w +FCW f c w +FCW's f c w's +FDA f d a +FDCC f d c c +FD&C f d and c +FDCPA f d c p a +FDCPAs f d c p a's +FDD f d d +FDDI f d d i +FDE f d e +F. D. f d +F.D. f d +FDFL f d f l +FDGB f d g b +FDG f d g +FDICs f d i c's +FdI f d i +FDI f d i +FDIO f d i o +FDJ f d j +FDL f d l +FDLT f d l t +FDM f d m +FDMNES f d m n e s +FDNY f d n y +FDP f d p +FDP's f d p's +FDR f d r +FDU f d u +F. E. f e +F.E. f e +FEFK f e f k +FEFSI f e f s i +FEFY f e f y +FEG f e g +FEGs f e g's +FEP f e p +F.E.R.S. f e r s +Fes f e's +FES f e s +FFA f f a +FFAK f f a k +ffc f f c +FFC f f c +FFC's f f c's +FFDO f f d o +ff f f +F. F. f f +F.F. f f +FF f f +FFF f f f +FFG f f g +FFH f f h +FFI f f i +FFK f f k +FFL f f l +FFL's f f l's +FFME f f m e +ffm f f m +FFOM f f o m +FFP f f p +FFPIR f f p i r +FFR f f r +FFSA f f s a +FF's f f's +FFs f f's +FFS f f s +FFU f f u +FFV f f v +FFWD f f w d +FGCU f g c u +FGFA f g f a +FGF f g f +F. G. f g +F.G. f g +FGFR f g f r +FGID f g i d +fgk f g k +FGL f g l +FGM f g m +FGR f g r +FGs f g's +FGS f g s +FGU f g u +FGV f g v +F. H. A. f h a +FHA f h a +FHAR f h a r +F. H. C. f h c +FHFA f h f a +F. H. f h +F.H. f h +FHI's f h i's +FHL f h l +fhm f h m +FHM f h m +FHM's f h m's +FHQ f h q +FHSAA f h s a a +FHS f h s +FHWA f h w a +F. H. W. f h w +FIAC f i a c +fiadh f i a d h +fia f i a +FIA f i a +FIAPO f i a p o +FIA's f i a's +FIAS f i a s +FIBCO f i b c o +F. I. B. f i b +FiBL f i b l +FIBT f i b t +FIC f i c +Fi'd f i d +FIDH f i d h +F.I f +F&I f and i +F. I. f i +F.I. f i +FIFTA f i f t a +FIGC f i g c +Figl f i g l +FIH f i h +FIH's f i h's +fija f i j a +FIJL f i j l +FIK f i k +F.I.M. f i m +FIM f i m +FIMI f i m i +Fio f i o +FiO f i o +FIO f i o +F.I.R. f i r +F.I.T.T. f i t t +FIU f i u +FIVB f i v b +F. J. A. f j a +FJE f j e +F. J. f j +F.J. f j +F.J.G. f j g +FJK f j k +F. J. M. f j m +FKA f k a +FKBP f k b p +F. K. f k +F.K. f k +FKK f k k +FKL's f k l's +FKS f k s +FKY f k y +Fla f l a +FLB f l b +FLCL f l c l +FLCS f l c s +FLD f l d +FLDS f l d s +FLEC f l e c +FLE f l e +Fles f l e's +FLEST f l e s t +FLETC f l e t c +fl f l +F. L. f l +F.L. f l +FL f l +FLH f l h +fli f l i +FLI f l i +FLI's f l i's +FLL f l l +F.L.M. f l m +FLM f l m +FLNB f l n b +FLN f l n +FLP f l p +FLPP f l p p +FLQ f l q +FLRA f l r a +FLR f l r +FLRL f l r l +FLS f l s +FLSW f l s w +FLT f l t +FLTK f l t k +FLTR f l t r +FLTS f l t s +FLV f l v +FLW f l w +flyr f l y r +FMA f m a +FMARS f m a r s +FMC f m c +FMCG f m c g +FMCSA f m c s a +FMCS f m c s +FMF f m f +fm f m +F. M. f m +F.M. f m +FM f m +FMH f m h +FMHS f m h s +FMI f m i +FMJD f m j d +FMLN f m l n +FMM f m m +FMN f m n +FMNJ f m n j +FMNP f m n p +FMPA's f m p a's +FMP f m p +FMQB f m q b +FMR f m r +fMRI f m r i +FMRP f m r p +FMRU f m r u +FM's f m's +FMs f m's +FMS f m s +FMSF's f m s f's +FMSO f m s o +FMV f m v +FMW f m w +FMX f m x +FMY f m y +FNAC f n a c +F. N. A. f n a +FNA f n a +FNB f n b +FNC f n c +F. N. D. f n d +FNE f n e +F. N. f n +F.N. f n +FNIC f n i c +FNI f n i +FNLA f n l a +FNLC f n l c +FNLC's f n l c's +FNL f n l +FNO f n o +FNP f n p +FNPI f n p i +FNSEA f n s e a +FNU f n u d s a +FOF f o f +F. O. f o +FPA f p a +FPB f p b +FPC f p c +FPCP f p c p +F.P.D. f p d +FPDs f p d's +fp f p +F. P. f p +F.P. f p +FP f p +FPGA f p g a +FPGAs f p g a's +FPI f p i +FPJ f p j +FPK f p k +F. P. L. f p l +FPL f p l +FPMR f p m r +FPM&SA f p m and s a +FPO f p o +FPP f p p +FPPS f p p s +F.P.R. f p r +FPR f p r +FPSC f p s c +fps f p s +FPS f p s +FPSO f p s o +FPSs f p s's +FPTP f p t p +FPU f p u +FQAD f q a d +F.Q. f q +FRA f r a +FRA's f r a's +FRAXA f r a x a +FRB f r b +FRBNF f r b n f +FRCC f r c c +F. R. C. f r c +FRC f r c +FRCNA f r c n a +FRCP f r c p +FRCS f r c s +FRDF f r d f +F.R.D. f r d +fr f r +F. R. f r +F.R. f r +FR f r +FRFS f r f s +FRFT f r f t +FRG f r g +F.R.G.S. f r g s +F.R.H.S. f r h s +F.R.I.B.A. f r i b a +FRMN f r m n +FRMR f r m r +FRMS f r m s +F. R. P. f r p +FRP f r p +FRR f r r +F.R.S.A. f r s a +FRSA f r s a +FRSC f r s c +FRSE f r s e +frs f r s +F. R. S. f r s +F.R.S. f r s +FRS f r s +FRSL f r s l +Fru f r u +FRU f r u +FRV f r v +FRWT f r w t +Frwydr f r w y d r +FSA f s a +FSB f s b +FSC f s c +FSCS f s c s +FSE f s e +fsf f s f +FSF f s f +FSFH f s f h +FSFLA f s f l a +fs f s +F. s f's +Fs f's +F. S. f s +F.S. f s +FS f s +FSFS f s f s +FSG f s g +FSGS f s g s +FSH f s h +FSIC f s i c +FSI f s i +FSIL f s i l +FSK f s k +FSKN f s k n +FSLF f s l f +FSL f s l +FSLN f s l n +FSM f s m +FSM's f s m's +fsn f s n +FSN f s n +FSO f s o +FSP f s p +FSPS f s p s +FSR f s r +FSRU f s r u +FSSB f s s b +FSS f s s +FSSM f s s m +FSSRU f s s r u +FSSs f s s's +FST f s t +FSU f s u +FSU's f s u's +FSV f s v +FTA f t a +FTAs f t a's +F.T.B. f t b +FTC f t c +FTD f t d +FTDI f t d i +FTE f t e +FTF f t f +Ft. Fort +F. T. f t +F.T. f t +FTI f t i +FTII f t i i +FTJ f t j +FTK f t k +FTL f t l +FTM f t m +FTN f t n +FTO f t o +ftp f t p +FTP f t p +FTPI f t p i +FTPL f t p l +FTRA f t r a +FTR f t r +FtsA f t s a +FT's f t's +FTS f t s +FTT f t t +FTTH f t t h +FTUC f t u c +Ftuh f t u h +FTU's f t u's +FTV f t v +F. V. B. f v b +F. V. f v +F.V. f v +FVGM f v g m +FVHS f v h s +FVJC f v j c +FVL f v l +FVP f v p +FVW f v w +FVWM f v w m +FWAA f w a a +FWA f w a +FWCD f w c d +FWD f w d +FWE f w e +F&W f and w +F. W. f w +F.W. f w +FWH f w h +FWI f w i +FWMW f w m w +FWO f w o +fwr f w r +F.W.R. f w r +F. W. S. f w s +F.W.S. f w s +FWS f w s +FWX f w x +FXCM's f x c m's +FXE f x e +FXFL f x f l +fx f x +FX f x +FXI f x i +F.X.R. f x r +FX's f x's +FXS f x s +FXX f x x +FXX's f x x's +FXXT f x x t +Fyb f y b +FYCO f y c o +F.Y. f y +F.Y.I. f y i +FYI f y i +Fyn f y n +FYN f y n +FYP f y p +fyr f y r +F.Y.R. f y r +FYR f y r +FYT f y t +FYU f y u +Fyw f y w +FZCI f z c i +FZD f z d +F.Z. f z +FZG f z g +FZJD f z j d +FZKA f z k a +Gaac g a a c +GAAF g a a f +G.A.A. g a a +G. A. g a +G.A. g a +G.A.R. g a r +GASL g a s l +GATA g a t a +GATV g a t v +GATX g a t x +G. A. W. g a w +GBA g b a +GBAG g b a g +GBASE g b a s e +GBA's g b a's +GBB g b b +GBBI g b b i +GBCA g b c a +GBC g b c +GBCM g b c m +GBD g b d +Gbe g b e +GbE g b e +GBE g b e +G. B. g b +G.B. g b +GBGC g b g c +GBGM g b g m +GBH g b h +GBIB g b i b +GBIF g b i f +GBI g b i +GBK g b k +GBL g b l +GBMA g b m a +G. B. M. g b m +GB&NDR g b and n d r +GBP g b p +GBR g b r +GB's g b's +GBS g b s +GBV g b v +GBX g b x +GBYLA g b y l a +GCAC g c a c +GCAF g c a f +GCA g c a +G. C. B. g c b +GCB g c b +GCC g c c +GCCN g c c n +GCC's g c c's +GCCS g c c s +GCD g c d +G.C.E. g c e +GCEP g c e p +GCF g c f +G&C g and c +G. C. g c +G.C. g c +GCHQ g c h q +GCI g c i +GCIP g c i p +GCIT's g c i t's +GCIV g c i v +GCJ g c j +GCL g c l +GCMC g c m c +G.C.M. g c m +GCM g c m +GCMG g c m g +GCN g c n +GCNT g c n t +GCOEC g c o e c +GCOS g c o s +GCPD g c p d +GCPH g c p h +GCR g c r +GCRIO g c r i o +GCSE g c s e +GCSEs g c s e's +GC&SF g c and s f +GCS g c s +GCSI g c s i +GCU g c u +GCV g c v +GCVO g c v o +GCVS g c v s +GDA g d a +Gdal g d a l +GDAL g d a l +GDAP g d a p +GDB g d b +GDC g d c +GDD g d d +GDDM g d d m +GDDR g d d r +Gde g d e +G. D. F. g d f +GDF g d f +gd g d +G. D. g d +G.D. g d +GD g d +GDI g d i +G.D.L. g d l +GDN g d n +G.D.O.D. g d o d +Gdow g d o w +G.D.P. g d p +GDP g d p +GDPs g d p's +gdr g d r +GDR g d r +GDR's g d r's +GDS g d s +GDST g d s t +GDT g d t +GDV g d v +Gebr g e b r +Gebt g e b t +GEC g e c +Ged g e d +GED g e d +GEF g e f +GEGB g e g b +ge g e +G. E. g e +G.E. g e +GE g e +GEHA g e h a +GEHX g e h x +Gek g e k +G.E.M. g e m +Gen. general +G.E.O. g e o +G. E. P. g e p +geq g e q +Geu g e u +GEU g e u +Gev g e v +GeV g e v +GEV g e v +Gfa g f a +GFA g f a +GF&A g f and a +GFC g f c +GFD g f d +GFDL g f d l +GFE g f e +GFF g f f +G. F. g f +G.F. g f +GFM g f m +GFP g f p +G. F. R. g f r +GFR g f r +GFS g f s +GFSN g f s n +GFSR g f s r +GFT g f t +GFWC g f w c +GFW g f w +GFY g f y +G. G. g g +G.G. g g +G. G. J. g g j +GGL g g l +GGM g g m +GGMRC g g m r c +GGS g g s +G.H.A. g h a +GHB g h b +GHC g h c +GHD g h d +Ghe g h e +G. H. g h +G.H. g h +GHGs g h g's +Ghir g h i r +GHK g h k +GHMC g h m c +G.H.N. g h n +G.H.Q. g h q +GHQ g h q +ghra g h r a +GHR g h r +GH&SA g h and s a +GHSA g h s a +GHSFHA g h s f h a +GHS g h s +GHSR g h s r +GHSU g h s u +GHU g h u +G.H.W. g h w +GICL g i c l +Giei g i e i +GIFV g i f v +G. I. g i +G.I. g i +G.J.B. g j b +G. J. g j +G.J. g j +G. K. g k +G.K. g k +Gla g l a +GLA g l a +GLB g l b +GLBT g l b t +GLBTQ g l b t q +GLC g l c +G&L g and l +G. L. g l +G.L. g l +gli g l i +Gli g l i +GLI g l i +Glis g l i's +GLK g l k +GLL g l l +GLLI g l l i +GLM g l m +glnA g l n a +G.L.O.R.Y. g l o r y +Glos g l o's +GLRS g l r s +GLS g l s +GLT g l t +GLVC g l v c +GLX g l x +GMAC g m a c +gma g m a +GMA g m a +GMAM g m a m +GMA's g m a's +GMAT g m a t +GMB g m b +GmbH g m b h +GMBH g m b h +GMB's g m b's +GMCC g m c c +GMC g m c +GMCL g m c l +GMC's g m c's +GMDSS g m d s s +GME g m e +Gmel g m e l +GMES g m e s +GMFF g m f f +GMF g m f +GMG g m g +gm g m +G. M. g m +G.M. g m +GM g m +GMG's g m g's +gmin g m i n +GMM g m m +GMO g m o +GMOs g m o's +GMPC g m p c +GMPE g m p e +GMP g m p +GMPTE g m p t e +GMR g m r +GMRL g m r l +GM's g m's +GMs g m's +GMS g m s +gmt g m t +GMT g m t +GMTV g m t v +Gmul g m u l +GNAA g n a a +GNA g n a +gnb g n b +GNB g n b +GNCC g n c c +G. N. C. g n c +GNC g n c +G. N. g n +G.N. g n +GNL g n l +GNP&BR g n p and b r +GNPDA g n p d a +gnp g n p +GNP g n p +GNPs g n p's +GNRD g n r d +GNR g n r +GNR's g n r's +GNS g n s +GNSS g n s s +GNT g n t +G.O. g o +G.O.O.D. g o o d +GOPB g o p b +G.O.P. g o p +GOP g o p +GOPIO g o p i o +GOP's g o p's +GOQ g o q +GPA g p a +GPB g p b +G.P.C. g p c +GPC g p c +GPCR g p c r +GPCRs g p c r's +GPD g p d +GPdI g p d i +GPDSC g p d s c +GPF g p f +GPFS g p f s +G&P g and p +GPG g p g +gp g p +G. P. g p +G.P. g p +GP g p +GPGPU g p g p u +GPI g p i +GpIIb g p i i b +GPIO g p i o +GPL g p l +GPMGs g p m g's +G.P.O. g p o +GPO g p o +GPP g p p +GPRA g p r a +GPRF g p r f +GPR g p r +GPRs g p r's +GPRS g p r s +GPSC g p s c +GPS g p s +GPT g p t +GPU g p u +GPUs g p u's +GPX g p x +G. Q. g q +GQ g q +GQNYGK g q n y g k +GQ's g q's +GQT's g q t's +G.R.A.B. g r a b +grac g r a c +gra g r a +Gra g r a +GRA g r a +Grbac g r b a c +GRB g r b +GRBs g r b's +grc g r c +GRC g r c +GRCs g r c's +GRDA g r d a +GRDC g r d c +G.R.D. g r d +GRD g r d +gre g r e +Gre g r e +GRE g r e +GRF g r f +GRG g r g +gr g r +G. R. g r +G.R. g r +GR g r +GRK g r k +G.R.L. g r l +GRL g r l +GRMG g r m g +GRM g r m +GRO g r o +GRP g r p +grrl g r r l +G. R. S. g r s +GRS g r s +GRT g r t +GRU g r u +GRX g r x +Gryf g r y f +gry g r y +Gry g r y +GRY g r y +grz g r z +GSA g s a +GSAPP g s a p p +GSAS g s a s +GSB g s b +GSC g s c +GSDF g s d f +GSD g s d +G.S.E. g s e +GSE g s e +GSFC g s f c +GSF g s f +G&S g and s +GSG g s g +Gs g's +G. S. g s +G.S. g s +GS g s +GSHD g s h d +GSH g s h +GSI g s i +GSIs g s i's +GSIS g s i s +GSK g s k +GSL g s l +GSLV g s l v +GSMA g s m a +GSM g s m +GSNAS g s n a s +G. S. N. g s n +GSN g s n +GSNM g s n m +GSN's g s n's +GSO g s o +GSOM's g s o m's +GSOp g s o p +GSPC g s p c +G. S. P. g s p +GSP g s p +GSR g s r +GSSE g s s e +GSS g s s +GSSP g s s p +GSTA g s t a +G.S.T. g s t +GST g s t +GSU g s u +GSUSA g s u s a +GSVD g s v d +GSVS g s v s +GSX g s x +GTAA g t a a +GTA g t a +GTAs g t a's +GTB g t b +GTBK g t b k +GTBP g t b p +GTC g t c +GTCR g t c r +GTD g t d +GTEC g t e c +GTE g t e +GTF g t f +GTFT g t f t +GTFU g t f u +gt g t +G. T. g t +G.T. g t +GT g t +G. T. H. g t h +GTi g t i +GTI g t i +GTK g t k +GTL g t l +GTMO g t m o +G. T. O. g t o +GTO g t o +GTPBP g t p b p +GTP g t p +GTPR g t p r +GTRA g t r a +GTR g t r +GTRI g t r i +GTs g t's +GTS g t s +GTSV g t s v +GTT g t t +GTTP g t t p +GTU g t u +GTV g t v +GTW g t w +GTW's g t w's +GTX g t x +G. U. g u +G.U. g u +GUI's g u i's +GUIs g u i's +G.U.Y. g u y +GVA g v a +GVAV g v a v +GVBA g v b a +GVC g v c +G. V. g v +G.V. g v +GVHSS g v h s s +GVJZ g v j z +GVK g v k +GvpA g v p a +GVP g v p +GVRD g v r d +GVSU g v s u +GVT g v t +GVU g v u +GVVV g v v v +GVW g v w +GVWR g v w r +GWA g w a +GWB g w b +G. W. C. g w c +GWe g w e +Gwet g w e t +G.W.F. g w f +GWF g w f +gw g w +G. W. g w +G.W. g w +GW g w +G. W. H. g w h +GWHS g w h s +Gwi g w i +GWK g w k +G.W.L. g w l +GWO g w o +GWOT g w o t +gwr g w r +GWR g w r +GWR's g w r's +GW's g w's +G. W. S. g w s +GWS g w s +GWU g w u +GXG g x g +GX g x +GXP g x p +GYAN's g y a n's +GYC g y c +GYF g y f +Gy g y +G. Y. g y +GY g y +Gyn g y n +GYN g y n +Gyps g y p's +gyu g y u +Gyu g y u +GZA g z a +GZ g z +GZR g z r +HAFV h a f v +H. A. h a +H.A. h a +H&A h and a +HBA h b a +HBA's h b a's +HBBO h b b o +HBC h b c +HBCo h b c o +HBC's h b c's +HBCU h b c u +HBES h b e s +HBF h b f +HBGAs h b g a's +H. B. h b +H.B. h b +HB h b +HBI h b i +HBK h b k +HBLX h b l x +HBMP h b m p +HBO h b o +HBO's h b o's +HBPU h b p u +H&BR h and b r +HBR h b r +HBRN h b r n +HBRY h b r y +HB's h b's +HBS h b s +HBSS h b s s +HBU's h b u's +HBV h b v +HBW h b w +HCA h c a +HCAW h c a w +HCBE h c b e +HCB h c b +HCBS h c b s +HCCA h c c a +HCCB h c c b +HCCC h c c c +HCC h c c +HCDCH h c d c h +HCE h c e +HCEs h c e's +HCF h c f +H. C. G. h c g +HCGVN h c g v n +H. C. h c +H.C. h c +HC h c +HCH h c h +HCHS h c h s +HCI h c i +HCJB h c j b +hcl h c l +HCL h c l +HClO h c l o +HCMC's h c m c's +HCME h c m e +HCM h c m +HCN h c n +HCO h c o +HCoV h c o v +HCPA h c p a +H. C. P. h c p +HCP h c p +HCPs h c p's +HCR h c r +HCS h c s +HCT h c t +HCTV's h c t v's +HCUP h c u p +HCV h c v +Hcy h c y +HDAC h d a c +Hdad h d a d +hDAF h d a f +HDA h d a +HDB h d b +HDCAM h d c a m +HDC h d c +HDCP h d c p +HDD h d d +HDDs h d d's +HDFC h d f c +H.D.F. h d f +HDFM h d f m +H.D.G. h d g +HDG h d g +hd h d +H. D. h d +H.D. h d +HD h d +hdh h d h +HDi h d i +HDI h d i +HDLC h d l c +HDL h d l +HDLS h d l s +HDM h d m +HDMI h d m i +HDPE h d p e +HDP h d p +HDR h d r +HDRO h d r o +HDRR h d r r +H. E. h e +H.E. h e +hezb h e z b +Hezb h e z b +HFA h f a +HFB h f b +HFC h f c +HFCS h f c s +HFDF h f d f +HFF h f f +HFF's h f f's +H&F h and f +H. F. h f +H.F. h f +HFI h f i +H. F. J. h f j +HFNB h f n b +HFPA h f p a +HFP h f p +HFPPV h f p p v +HFRS h f r s +HFS h f s +HFTP h f t p +H.G.B. h g b +hgcA h g c a +HGC h g c +HGF h g f +hg h g +H. G. h g +H.G. h g +HG h g +HGH h g h +HGNC h g n c +HgO h g o +H. G. O. h g o +HGPIN h g p i n +HGPs h g p's +HGTV h g t v +HGTV's h g t v's +HgU h g u +HGVs h g v's +HGY h g y +HHA h h a +H. H. B. h h b +HHC h h c +HHD h h d +HHDP h h d p +HHG h h g +H&H h and h +HHHDB h h h d b +H. H. h h +H.H. h h +HHPC h h p c +HHSAA h h s a a +HHS h h s +H.H.T. h h t +H. I. h i +H.I. h i +H.I.V.E. h i v e +HIV h i v +HIY h i y +Hizb h i z b +hja h j a +HJC h j c +H. J. h j +H.J. h j +HJ h j +HJK h j k +H.J.M. h j m +HJN h j n +H. J. R. h j r +H.J.R. h j r +H. J. W. h j w +HJW h j w +HKAA h k a a +Hkam h k a m +HKB h k b +HKCEC h k c e c +HKCEE h k c e e +HKDF h k d f +HKD h k d +HKDSE h k d s e +HKFC h k f c +H&K h and k +H. K. h k +H.K. h k +HK h k +H. K. J. h k j +HKK h k k +HKMA h k m a +HKN h k n +H.K.P. h k p +HKP h k p +HKSAR h k s a r +HK's h k's +HKT h k t +HKTWU h k t w u +HKU h k u +Hla h l a +HLA h l a +HLBN h l b n +HLE h l e +HLH h l h +H. L. h l +H.L. h l +HLL h l l +HLLS h l l s +HLN h l n +HLN's h l n's +HLS h l s +HLT h l t +HLTV h l t v +Hluk h l u k +hlutr h l u t r +HLV h l v +Hly h l y +HMAC h m a c +HMA h m a +Hman h m a n +H.M.A.S. h m a s +HMAS h m a s +HMB h m b +HMC h m c +HMCS h m c s +HMDA h m d a +HMD h m d +HMDI h m d i +HMDP h m d p +HME h m e +HMEL h m e l +HMF h m f +HMGA h m g a +HMG h m g +HMGIC h m g i c +HMGN h m g n +H&M h and m +HMH h m h +hm h m +H. M. h m +H.M. h m +HM h m +HMI h m i +HMIS h m i s +HMMC h m m c +HMM h m m +HMMWV h m m w v +HMN h m n +HMNZS h m n z s +HMNZT h m n z t +HMO h m o +HMOs h m o's +HMPC h m p c +HMP h m p +HMRC h m r c +hMRE h m r e +H&M's h and m's +H. M. S. h m s +H.M.S. h m s +HMS h m s +HMSN h m s n +H.M.S.O. h m s o +HMSO h m s o +H. N. h n +H.N. h n +HNI h n i +HNK h n k +HNL h n l +HNLMS h n l m s +HNMT h n m t +HNN h n n +HNO h n o +HNS h n s +H. O. h o +H.O. h o +HPA h p a +H.P.B. h p b +hpc h p c +HPC h p c +HPCT h p c t +H. P. D. h p d +HPD h p d +HPE h p e +HPH h p h +hp h p +H. P. h p +H.P. h p +HP h p +HPI h p i +HPLC h p l c +HPL h p l +HPP h p p +HPRA h p r a +HPR h p r +HPRT h p r t +HP's h p's +HPS h p s +H. P. T. h p t +HPV h p v +HQAA h q a a +HQDA h q d a +H.Q. h q +HQ h q +HRG h r g +H&R h and r +H.R.H. h r h +HRH h r h +H. R. h r +H.R. h r +HR h r +HRK h r k +HRL h r l +HRM h r m +hroa h r o a +HRo h r o +HRO h r o +HRP h r p +HRSA h r s a +HRSA's h r s a's +HRSG h r s g +hrs h r s +HRs h r's +HRST h r s t +HRT h r t +HRV h r v +HRW h r w +HRX h r x +HSA h s a +HSAN h s a n +HSBC h s b c +HSB h s b +HSCA h s c a +HSC h s c +hscy h s c y +HSD h s d +HSDPA h s d p a +Hsee h s e e +HSE h s e +HSE's h s e's +HSF h s f +HSG h s g +HSHB h s h b +H.S.H. h s h +hs h s +Hs h's +H. S. h s +H.S. h s +HS h s +HSHS h s h s +hsi h s i +HSJ h s j +HSK h s k +HSL h s l +hSlo h s l o +H.S.M. h s m +HSM h s m +HSMR h s m r +HSN h s n +HSNY's h s n y's +HSPA h s p a +HSPA's h s p a's +HSPG h s p g +HSPH h s p h +HSP h s p +HSP's h s p's +HSPs h s p's +HSRC h s r c +HSR h s r +HSSCCT h s s c c t +HSSF h s s f +HSS h s s +HST h s t +HST's h s t's +HSTs h s t's +HSTS h s t s +hsv h s v +HSV h s v +HSX h s x +HSY h s y +HSYK's h s y k's +HSZ h s z +HTA h t a +hTAS h t a s +HTAs h t a's +htc h t c +HTC h t c +HTC's h t c's +HTGL h t g l +HTHC h t h c +ht h t +H. T. h t +H.T. h t +HT h t +HTIB h t i b +hti h t i +Htin h t i n +HTK h t k +HTLA h t l a +HTLV h t l v +HTML h t m l +HTM's h t m's +Htoo h t o o +HTPB h t p b +HTPC h t p c +HTP h t p +HtrA h t r a +HTR h t r +HTRs h t r's +HTS h t s +http h t t p +HTTP h t t p +https h t t p s +HTTPS h t t p s +HTV h t v +HTV's h t v's +Htwa h t w a +HTW h t w +HTX h t x +H. U. h u +HVC h v c +HVCs h v c's +hvcv h v c v +HVDC h v d c +hvem h v e m +Hvem h v e m +HVF h v f +HVHS h v h s +H. V. h v +H.V. h v +H.W.A. h w a +H. W. F. h w f +H. W. h w +H.W. h w +H.W.L. h w l +HWL h w l +HWM h w m +HWRT h w r t +HWT h w t +HWV h w v +hwy h w y +Hwy h w y +hxt h x t +H. Y. h y +H.Y. h y +H. Z. h z +HZ&PC h z and p c +iaaf i a a f +IAAF i a a f +IAAF's i a a f's +IAA i a a +IAAPA i a a p a +IAAP i a a p +IaaS i a a s +IAAS i a a s +IABSE i a b s e +Iaca i a c a +IACC i a c c +IACD i a c d +IAC i a c +Iacob i a c o b +IACR i a c r +IAC's i a c's +IACTs i a c t's +Iacub i a c u b +IACUC i a c u c +IACW i a c w +IAD i a d +IAD's i a d's +IAEA i a e a +IAEG i a e g +I.A.E. i a e +IAE i a e +IAF i a f +IAF's i a f's +IAG i a g +IAG's i a g's +iagt i a g t +IAHP i a h p +IAHRC i a h r c +I. A. i a +I.A. i a +I.A.M. i a m +IAO i a o +Iapa i a p a +IAPHC i a p h c +IAP i a p +iar i a r +Iar i a r +IAR i a r +IAST i a s t +IATA i a t a +I.A.T. i a t +IAT i a t +IATR i a t r +IAUC i a u c +IAUCs i a u c's +IAU i a u +IAV i a v +IAWTV i a w t v +Iax i a x +IAX i a x +Ibac i b a c +Ibaes i b a e's +IBA's i b a's +IBB i b b +IBC i b c +IBCs i b c's +IBCT i b c t +IBDA'A i b d a a +Ibda i b d a +ibdal i b d a l +IBDB i b d b +IBDE i b d e +IBD i b d +IBE i b e +IBF i b f +IBGE i b g e +IBH i b h +I. B. i b +I.B. i b +IBJJF i b j j f +IBL i b l +IBLT i b l t +IBMA's i b m a's +IBMC i b m c +Ibm i b m +I.B.M. i b m +IBM i b m +IBM's i b m's +IBMXF i b m x f +Ibne i b n e +ibn i b n +IBO i b o +IBPA i b p a +IBP i b p +Ibra i b r a +IBRA i b r a +IBRC i b r c +IBR i b r +IBRO i b r o +Ibsa i b s a +IBSA i b s a +IBSF i b s f +Ibs i b's +IBS i b s +IBSL i b s l +IBTC i b t c +iBT i b t +IBTS i b t s +Ibu i b u +IBU i b u +IBV i b v +IBWFF i b w f f +iby i b y +ICBA i c b a +ICBC i c b c +ICB i c b +ICBL i c b l +ICBM i c b m +ICBMs i c b m's +ICBP i c b p +ICBS i c b s +ICBTT i c b t t +ICCC i c c c +ICCC's i c c c's +ICCEC i c c e c +ICCF i c c f +Icche i c c h e +I.C.C. i c c +ICC i c c +ICCID i c c i d +ICCI i c c i +ICCR i c c r +ICC's i c c's +ICCS i c c s +ICCW i c c w +ICDC i c d c +ICD i c d +ICDS i c d s +I.C.E. i c e +ICFF i c f f +icf i c f +ICF i c f +ICFR i c f r +ICG i c g +ICHC i c h c +I&C i and c +ic i c +Ic i c +I. C. i c +I.C. i c +IC i c +Ici i c i +ICI i c i +ICJ i c j +ICL i c l +ICL's i c l's +ICMA i c m a +ICME i c m e +I.C.M. i c m +ICM i c m +iCN i c n +ICN i c n +Ico i c o +ICO i c o +ICOP i c o p +ICPC i c p c +ICPD i c p d +ICP i c p +ICP's i c p's +ICQ i c q +ICRC i c r c +ICRF i c r f +ICR i c r +IcRn i c r n +ICRP i c r p +ICRS i c r s +ICRW i c r w +ICSA i c s a +ICSB i c s b +ICSC i c s c +ICSE i c s e +ICSG i c s g +ics i c s +ICs i c's +ICS i c s +ICSS i c s s +ICSSR i c s s r +ICTA i c t a +ICT i c t +ICTM i c t m +ICTP i c t p +ICTSF i c t s f +ICTs i c t's +ICTS i c t s +ICTU i c t u +ICTUR i c t u r +ICTVdB i c t v d b +ICTV's i c t v's +ICTY i c t y +ICU i c u +ICUN i c u n +ICU's i c u's +ICv i c v +ICV i c v +ICVS i c v s +ICWA i c w a +ICWC i c w c +ICW i c w +ICW's i c w's +iCyt i c y t +ICZ i c z +ICZN i c z n +IDAF i d a f +IDBF i d b f +IDB i d b +IDBI i d b i +IDC i d c +Iddaa i d d a a +Idd i d d +ID'd i d d +IDEF i d e f +IDE i d e +IDFA i d f a +IDFG i d f g +IDF i d f +IDFL i d f l +IDF's i d f's +IDG i d g +I. D. H. i d h +IDH i d h +id i d +I. D. i d +I.D. i d +ID i d +Idi i d i +Idir i d i r +IDIS i d i s +IDLHs i d l h's +IDL i d l +Idm i d m +IDM i d m +IDMR i d m r +IDMS i d m s +IDNA i d n a +IDN i d n +IDNR i d n r +IDNR's i d n r's +IDPs i d p's +IDRC i d r c +IDRO i d r o +IDSA i d s a +IDSA's i d s a's +IDSF i d s f +ids i d s +ID's i d's +IDs i d's +IDV i d v +IDW i d w +IDWO i d w o +IDW's i d w's +IDx i d x +IDX i d x +IEA i e a +I.E.M. i e m +IEM i e m +I.E.S. i e s +IES i e s +i.e. that is +Ifa i f a +IFA i f a +IFBB i f b b +IFBC i f b c +I.F.B. i f b +IFCC i f c c +ifc i f c +IFC i f c +IFC's i f c's +IfF i f f +I. F. F. i f f +IFF i f f +IFFI i f f i +IFFK i f f k +Ifft i f f t +IFHA i f h a +IFHP i f h p +ifi i f i +Ifi i f i +IFI i f i +IFIP i f i p +IFIs i f i's +IFK i f k +IFLA i f l a +IFL i f l +IfM i f m +IFM i f m +IFN i f n +Ifni i f n i +IFNs i f n's +IFO i f o +Ifop i f o p +IFP i f p +IFPI i f p i +IFPRI i f p r i +IFRC i f r c +IFR i f r +IFRS i f r s +IFRSs i f r s's +IFSB i f s b +IFSC i f s c +IFSTA i f s t a +IFTA i f t a +IFTF i f t f +IFT i f t +Iga i g a +IgA i g a +IGA i g a +IGA's i g a's +IGBT i g b t +IGC i g c +IGCP i g c p +IGCSE i g c s e +IGD i g d +IGF i g f +IGFI i g f i +IGFs i g f's +IGIB i g i b +'ig i g +Ig i g +I.G. i g +IG i g +igi i g i +IGI i g i +IgM i g m +IGM i g m +IGMP i g m p +ign i g n +Ign i g n +IGN i g n +IHF i h f +IHGS i h g s +IHH i h h +Ih i h +I. H. i h +IH i h +ihi i h i +IHI i h i +IHL i h l +IHL's i h l's +IHMC i h m c +IHME i h m e +ihm i h m +ihn i h n +Ihn i h n +IHOM i h o m +Ihor i h o r +IHPK i h p k +IHRC i h r c +IHSAA i h s a a +IHSA i h s a +IHS i h s +Iht i h t +IIA i i a +IIb i i b +IIB i i b +IIc i i c +IIDC i i d c +IID i i d +IIDI i i d i +IIDS i i d s +IIDX i i d x +IIED i i e d +IIFA i i f a +IIF i i f +IIfx i i f x +Iiga i i g a +IIga i i g a +IIG i i g +IIgs i i g's +IIGs i i g's +IIGS i i g s +IIGS's i i g s's +IIHF i i h f +IIHR i i h r +IIHS i i h s +IIIA i i i a +IIIb i i i b +IIIB i i i b +IIIBy i i i b y +IIIc i i i c +IIId i i i d +IIIF i i i f +I. I. i i +I.I. i i +ija i j a +Ija i j a +IJA i j a +IJF i j f +IJGL i j g l +IJHCD i j h c d +IJHL i j h l +ij i j +I. J. i j +IJ i j +IJN i j n +Ijok i j o k +IJSB i j s b +IJSE i j s e +IJSEM i j s e m +IJS i j s +Ijui i j u i +IJVM i j v m +IKBN i k b n +IKCO i k c o +IKF i k f +Ikh i k h +I.K. i k +IKK i k k +IKL i k l +IKr i k r +Iksa i k s a +IKTA i k t a +Iku i k u +Ilbe i l b e +ILB i l b +ILC i l c +ILCS i l c s +ILD i l d +ILDIS i l d i s +ILFC i l f c +ILF i l f +IL&FS i l and f s +ili i l i +Ili i l i +ILI i l i +I. L. i l +I.L. i l +Ilm i l m +ILM i l m +ILM's i l m's +ILN i l n +ILP i l p +ILR i l r +ILRS i l r s +ilu i l u +Ilu i l u +ILU i l u +IMbd i m b d +IMBD i m b d +IMB i m b +IMCB i m c b +IMCI i m c i +imc i m c +IMC i m c +imdb i m d b +IMDb. i m d b +IMDb i m d b +IMDB i m d b +IMD i m d +IMD's i m d's +ime i m e +IME i m e +IMF i m f +IMG i m g +I&M i and m +IMI i m i +I. M. i m +I.M. i m +IMIS i m i s +IMK i m k +Iml i m l +IMMEX i m m e x +Imm i m m +Imms' i m m's +imoa i m o a +IMO's i m o's +IMPA i m p a +IMPDH i m p d h +impr i m p r +Impr i m p r +IMPSA i m p s a +IMR i m r +IMSLP i m s l p +iMSNs i m s n's +Imst i m s t +IMTA i m t a +IMT i m t +IMTV i m t v +IMUK i m u k +IMU's i m u's +IMWF i m w f +INAC i n a c +Inba i n b a +INBA i n b a +INBF i n b f +inb i n b +incl i n c l +INDC i n d c +ind i n d +Ind i n d +IND i n d +INFJ i n f j +I.N.F.O. i n f o +Infs i n f's +ING i n g +INGR i n g r +inHg i n h g +INH i n h +INI i n i +I. N. i n +I.N. i n +InlB i n l b +INL i n l +INM i n m +INPD i n p d +INPE i n p e +InP i n p +INP i n p +INPP i n p p +Inre i n r e +I.N.R.I. i n r i +INR i n r +INSB i n s b +INSDC i n s d c +INSD i n s d +Inspx i n s p x +INSS i n s s +INTF i n t f +Ints i n t's +INTs i n t's +Intu i n t u +INVA i n v a +inv i n v +INXS i n x s +INXS's i n x s's +Ioba i o b a +IOCG i o c g +IOC i o c +IOCP i o c p +IODP i o d p +IODs i o d's +IOE i o e +IOFB i o f b +IOF i o f +io. i o +io i o +.I.o. i o +Io. i o +Io i o +.IO i o +I/O i o +IO i o +Ioka i o k a +Iok i o k +IOL i o l +IoM i o m +IOM i o m +IOPI i o p i +IOP i o p +IOPs i o p's +IORG i o r g +IOR i o r +IORM i o r m +IOSA i o s a +IOSB i o s b +iOS i o s +Io's i o's +I/Os i o's +I.O.'s i o's +IOS i o s +IOSR i o s r +Iosu i o s u +IoT i o t +IOT i o t +I.O.U. i o u +IOU i o u +IoW i o w +IOW i o w +IPA i p a +IPA's i p a's +IPB i p b +IPCC i p c c +IPCC's i p c c's +IPC i p c +IPCRI i p c r i +IPCs i p c's +IPDA i p d a +IPDI i p d i +IPD i p d +IPE i p e +IPFA i p f a +IPFI i p f i +IPF i p f +IPFP i p f p +IPFW i p f w +IPG i p g +IPH i p h +ip i p +Ip i p +I.P. i p +IP i p +IPL i p l +IPMG i p m g +IPM i p m +IPNI i p n i +IPN i p n +Ipo i p o +I.P.O. i p o +IPO i p o +IPP i p p +IPPL i p p l +IPPs i p p's +IPPT i p p t +Ippu i p p u +iPPV i p p v +IPR i p r +iPSC i p s c +IPSC i p s c +iPSCs i p s c's +ipse i p s e +IPSF i p s f +iPS i p s +IPs i p's +I.P.S. i p s +IPS i p s +IPTA i p t a +IPTC i p t c +IPTG i p t g +IPT i p t +IPTL i p t l +IPTV i p t v +IPU i p u +IPUM i p u m +IPv i p v +IPWA i p w a +IPW i p w +IPX i p x +iQ i q +I.Q. i q +IQ i q +IRAC i r a c +I.R.A. i r a +IRB i r b +IRBM i r b m +IRCA i r c a +IRC i r c +IRCM i r c m +IRCTC i r c t c +IRCT i r c t +IRD i r d +IRDS i r d s +IREC i r e c +I.R.E. i r e +IRE i r e +IRFCA i r f c a +IRFC i r f c +IRF i r f +IRFU i r f u +IRGC i r g c +Irgm i r g m +IRHG i r h g +IRHT i r h t +IRIB i r i b +Irig i r i g +IRI i r i +Iril i r i l +IRIN i r i n +i'r i r +ir i r +Ir i r +I. R. i r +I.R. i r +IR i r +IRIX i r i x +Irla i r l a +IRL i r l +IRM i r m +Irmis i r m i's +IRM's i r m's +IRNE i r n e +IRN i r n +iro i r o +Iro i r o +IRO i r o +IRP i r p +IRP's i r p's +IRPS i r p s +IRQ i r q +Irra i r r a +IRRI i r r i +IRR i r r +IRs i r's +I.R.S. i r s +IRS i r s +IRSN i r s n +I.R.T. i r t +IRT i r t +iru i r u +Iru i r u +ISA i s a +ISAv i s a v +Isba i s b a +ISBA i s b a +ISBER i s b e r +ISBHF i s b h f +ISB i s b +isbn i s b n +ISBN i s b n +ISBNs i s b n's +ISBO i s b o +ISCB i s c b +ISCE i s c e +isCf i s c f +ISCI i s c i +iSC i s c +Isc i s c +ISC i s c +ISCM i s c m +ISCM's i s c m's +ISCP i s c p +ISDA i s d a +Isd i s d +ISD i s d +isdn i s d n +ISDN i s d n +ISDS i s d s +ISDT i s d t +ISFI i s f i +ISF i s f +Isgec i s g e c +ISG i s g +ISHC i s h c +ishq i s h q +Ishq i s h q +ISHS i s h s +I. S. i s +I.S. i s +ISKB i s k b +ISK i s k +Isl i s l +ISL i s l +ISMF i s m f +ISMIR i s m i r +ISMNs i s m n's +ISN i s n +ISNTUC i s n t u c +ISOGG i s o g g +ISO i s o +ISO's i s o's +I.S.P.C.K. i s p c k +ISPF i s p f +ISPI i s p i +ISP i s p +ISPL i s p l +ISPR i s p r +ISPs i s p's +Isra'il i s r a i l +Isra i s r a +ISRA i s r a +ISR i s r +ISRN i s r n +ISRO i s r o +ISRP i s r p +ISSCC i s s c c +ISSF i s s f +iss i s s +Iss i s's +ISS i s s +ISSN i s s n +ISSNs i s s n's +ISSSSP i s s s s p +ISSUU i s s u u +ISTAF i s t a f +ISTD i s t d +Isu i s u +ISU i s u +ISU's i s u's +ISWC i s w c +ISWI i s w i +ISW i s w +I.T.A. i t a +ITA i t a +ITB i t b +ITC i t c +Ite i t e +ITE i t e +ITF i t f +ITG i t g +ITHF i t h f +I.T.H.M. i t h m +ITHs i t h's +iti i t i +Iti i t i +ITI i t i +I.T. i t +itk i t k +Itk i t k +ITK i t k +ITM i t m +ITMO i t m o +ITN i t n +ITNNs i t n n's +ITN's i t n's +Itoi i t o i +ITPF i t p f +iTP i t p +ITP i t p +ITRD i t r d +Itse i t s e +ITSF i t s f +ITTF i t t f +itt i t t +ITT i t t +ITUC i t u c +Itu i t u +ITU i t u +ITV i t v +ITV's i t v's +ITWF i t w f +ITW i t w +Itz i t z +IUB i u b +I.U.C.N.B. i u c n b +Iucn i u c n +IUCN i u c n +IUCN's i u c n's +IUD i u d +IUDs i u d's +IU i u +IUT i u t +IUU i u u +Iuz i u z +IVAA i v a a +IVAW i v a w +IVB i v b +I. V. i v +I.V. i v +IVL i v l +IVLP i v l p +IVM's i v m's +IVP i v p +IVPP i v p p +IVPTC i v p t c +IVRCL i v r c l +IVs i v's +Ivu i v u +IVWA i v w a +IWA i w a +Iwas i w a's +Iwaz i w a z +IWBs i w b's +IWC i w c +IWD i w d +IWF i w f +IWFL i w f l +IWF's i w f's +IWFS i w f s +IWG i w g +IWGP i w g p +iwi i w i +iw i w +I. W. i w +IW i w +IWL i w l +IWMF i w m f +IWM i w m +IWPR i w p r +iwrg i w r g +IWRG i w r g +IWR i w r +IWRS i w r s +IWSA i w s a +iwspy i w s p y +Iwuh i w u h +IWU's i w u's +I.W.W. i w w +IWW i w w +IXA i x a +IXb i x b +IXBs i x b's +IXC i x c +IXCs i x c's +IXe i x e +IX i x +iXL i x l +IXO i x o +ixtle i x t l e +IXV i x v +Iya i y a +IYA i y a +Iyar i y a r +IYC i y c +iyem i y e m +IYHF i y h f +Iyi i y i +iy i y +I. Y. i y +I.Y. i y +IY i y +J.A.A. j a a +J. A. B. j a b +Jaf j a f +J.A.G. j a g +JAG j a g +JAIR j a i r +J. A. j a +J.A.K.E. j a k e +J. A. N. j a n +J.A.P. j a p +J.A.S. j a s +JBA j b a +JBBA j b b a +JBB j b b +JBC j b c +JBG j b g +J&B j and b +jb j b +J. B. j b +J.B. j b +JB j b +J.B.L.D. j b l d +JBL j b l +J.B.M. j b m +JBM j b m +JB's j b's +J. B. S. j b s +J.B.S. j b s +JBT j b t +JBZ j b z +JCA j c a +JCB j c b +JCC j c c +JCF j c f +JCFS j c f s +J.C.G. j c g +JCG j c g +JCGM j c g m +JCIB j c i b +JCI j c i +jcis j c i s +jc j c +J. C. j c +J.C. j c +JC j c +JCJC j c j c +J. C. L. j c l +JCPA j c p a +J. C. P. j c p +JCP j c p +JCPV j c p v +JCRB j c r b +JCR j c r +JCRs j c r's +J.C.S. j c s +JCS j c s +JCSR j c s r +JCVI j c v i +J. C. W. j c w +JCW j c w +J. D. A. j d a +JDA j d a +JDBC j d b c +JDC j d c +JdeBP j d e b p +J.D.E. j d e +JDE j d e +JDENET j d e n e t +J.D.F. j d f +JDF j d f +jdi j d i +JDI j d i +jd j d +J. D. j d +J.D. j d +JD j d +jdk j d k +JDK j d k +J.D.M. j d m +JDM j d m +JDO j d o +JDRF j d r f +J&D's j and d's +J.D.s j d's +JDT j d t +JDU j d u +JDV j d v +J. E. A. j e a +J.E.A.L. j e a l +J.E.B. j e b +J. E. C. j e c +J. E. E. j e e +J. E. j e +J.E. j e +J.F.G. j f g +JFH j f h +jf j f +J. F. j f +J.F. j f +JF j f +JFK j f k +JFN j f n +JFQA j f q a +JFS j f s +J. G. A. j g a +J.G.B. j g b +JGI j g i +J. G. j g +J.G. j g +J. G. L. j g l +JGLS j g l s +J. G. M. j g m +J.G.M. j g m +JGO j g o +JGP j g p +JGR j g r +JGR's j g r's +JGSDF j g s d f +JGTC j g t c +JGU j g u +J.H.B. j h b +JHB j h b +J. H. C. j h c +JHC j h c +JHED j h e d +J.H.F. j h f +J.H.H. j h h +jh j h +J. H. j h +J.H. j h +JH j h +JHL j h l +J. H. M. j h m +J.H.R. j h r +JHs j h's +JHS j h s +JHSVs j h s v's +JHud j h u d +JHU j h u +J. I. j i +J.I. j i +J.J.A. j j a +JJB j j b +J. J. C. j j c +J.J.G. j j g +J&J j and j +J. J. j j +J.J. j j +JJ j j +JJK j j k +J. J. L. j j l +J.J.M. j j m +J.J.N. j j n +JJN j j n +Jka j k a +JKA j k a +JKC j k c +jkd j k d +JKH j k h +J&K j and k +J.K. J. j k j +J. K. j k +J.K. j k +JK j k +JKNPP j k n p p +J. K. S. j k s +JKT j k t +jkx j k x +JKX j k x +JLA j l a +J.L.B. j l b +JLB j l b +JLC j l c +J. L. E. j l e +J.L.E. j l e +JLG j l g +JLI j l i +J. L. j l +J.L. j l +J.L.L. j l l +JLM j l m +JLO's j l o's +JLPGA j l p g a +JLP j l p +JLS j l s +JLT j l t +JMA j m a +JMBG j m b g +J.M.C. j m c +JMC j m c +J. M. D. j m d +JMD j m d +J. M. E. j m e +JMI j m i +J. M. J. j m j +J.M.J. j m j +J. M. j m +J.M. j m +JMKR j m k r +J. M. L. j m l +JML j m l +JMM j m m +JMP j m p +JMS j m s +JMU j m u +JMV j m v +J. M. W. j m w +J.M.W. j m w +JMX j m x +JNA j n a +jnb j n b +JNF j n f +JNG j n g +JNI j n i +J. N. j n +J.N. j n +JN j n +JNK j n k +J. N. L. j n l +Jno j n o +JNPT j n p t +jnr j n r +JNR j n r +JNS's j n s's +JOJ j o j +J. O. j o +J.O. j o +J.P.D.B. j p d b +J. P. F. j p f +JPF j p f +jpg j p g +JPG j p g +JPH j p h +J&P j and p +jp j p +J.P. j p +JP j p +J.P.R. j p r +J.Q. j q +JQ j q +J.R.A. j r a +JRA j r a +J.R.C. j r c +JRC j r c +J.R.E. j r e +JRG j r g +JRHU j r h u +J. R. I. j r i +J&R j and r +J. R. j r +J.R. j r +JR j r +JRJ's j r j's +Jr. junior +jr. junior +Jr junior +jr junior +JRPG j r p g +J. R. P. j r p +J. R. R. j r r +J.R.R. j r r +J.R.'s j r's +JRSP j r s p +J. R. T. j r t +JRU j r u +JRX j r x +JSA j s a +JSAP j s a p +JSA's j s a's +jsb j s b +JSCA j s c a +JSC j s c +J.S.D. j s d +JSD j s d +JSE j s e +JSF j s f +JSH j s h +Js j's +J. S. j s +J.S. j s +JS j s +J.S.K. j s k +J.S.L. j s l +JSL j s l +J. S. M. j s m +JSM j s m +JSNN j s n n +JSOC j s o c +JSO j s o +JSP j s p +JSPS j s p s +JSR j s r +JSS j s s +JSW j s w +JSW's j s w's +JSX j s x +JTA j t a +J.T.B. j t b +JTB j t b +JTC j t c +JTF j t f +JTG j t g +JTI j t i +J&T j and t +J. T. j t +J.T. j t +JT j t +JT&KW j t and k w +JTL j t l +JTO j t o +JTR j t r +JTRO j t r o +JTS j t s +JTTF j t t f +JTT j t t +JTV j t v +J. U. j u +J.U.L.I.A. j u l i a +J.V.B. j v b +JVB j v b +JVC j v c +J.V.E. j v e +JVG j v g +JVH j v h +J&V j and v +J. V. j v +J.V. j v +JV j v +JVM j v m +JVNW j v n w +JVP j v p +JVRA j v r a +JVS j v s +Jwa j w a +J. W. A. j w a +J. W. B. j w b +JWB j w b +J.W.C. U.S. j w c u s +JWHA j w h a +J.W.J. j w j +J. W. j w +J.W. j w +JWM j w m +J. W. P. j w p +JWP j w p +JWRC j w r c +J. W. S. j w s +J.W.S. j w s +JWS j w s +JWST j w s t +JWT j w t +jx j x +JX j x +J.X.W.P. j x w p +JYJ j y j +JYJ's j y j's +J. Y. j y +J.Y. j y +JY j y +Jym j y m +Jymn j y m n +JYP j y p +J. Y. S. j y s +J. Z. j z +J.Z. j z +JZ j z +K.A.A. k a a +K. A. k a +K.A. k a +K.A.N. k a n +K. A. R. k a r +K.A.S. k a s +KBBI k b b i +KBB k b b +KBCI k b c i +KBC k b c +KBCO k b c o +KBD k b d +KBE k b e +KBFC k b f c +KBFL k b f l +KBFR k b f r +KBGD k b g d +KBG k b g +KBIG k b i g +kbi k b i +K&B k and b +K. B. k b +K.B. k b +KBKR k b k r +KBKS k b k s +KBKW k b k w +KBL k b l +KBMT k b m t +KBND k b n d +KBNZ k b n z +KBO k b o +KBRC k b r c +KBR k b r +KBS k b s +KBSN k b s n +KCB k c b +KCBS k c b s +KCCC k c c c +KCCI k c c i +KCC k c c +KCCO k c c o +KCDC k c d c +K. C. D. k c d +KCD k c d +KCED k c e d +KCET k c e t +KCG k c g +KCHS k c h s +K.C.I.E. k c i e +K. C. k c +K.C. k c +KCK k c k +KCKM k c k m +KCLU k c l u +K.C.M.G. k c m g +KCMG k c m g +KCNA k c n a +KCNH k c n h +KCNJ k c n j +KCN k c n +KCNN k c n n +KCNQ k c n q +KCNV k c n v +KCPD k c p d +KCPI k c p i +KCPM k c p m +KCPQ k c p q +KCPQ's k c p q's +KCQL k c q l +KCRA k c r a +KCRC k c r c +KCRH k c r h +KCR k c r +KCRW k c r w +KCSD k c s d +KCSG k c s g +K.C.S.I. k c s i +KCSI k c s i +K.C.'s k c's +KCS k c s +KCSP k c s p +KCTD k c t d +KCT k c t +KCTL k c t l +KCTV k c t v +KCUB k c u b +KCU k c u +KCVO k c v o +KCYX k c y x +kDa k d a +KDA k d a +kdal k d a l +KDAL k d a l +KDB k d b +KDCE k d c e +KDCO k d c o +KDDG k d d g +KDDI k d d i +KDD k d d +KDE k d e +KDFW k d f w +KDFX k d f x +K. D. G. k d g +KDGS k d g s +KDHS k d h s +KDKA k d k a +K&D k and d +kd k d +K. D. k d +K.D. k d +KD k d +KDP k d p +KDPM k d p m +KDR k d r +K. D.'s k d's +K.D.'s k d's +KDS k d s +ke k e +Ke k e +K. E. k e +K.E. k e +KE k e +KFAC k f a c +KFAED k f a e d +KFAI k f a i +KFA k f a +KFBK k f b k +KFBT k f b t +KFCC k f c c +KFCD k f c d +kfc k f c +K.F.C. k f c +KFC k f c +KFC's k f c's +KFD k f d +KFDM k f d m +KFEM k f e m +KFFL k f f l +KFH k f h +KFI k f i +KFJB k f j b +K.F. k f +KFLX k f l x +KFMB's k f m b's +KFMJ k f m j +K. F. M. k f m +K.F.M. k f m +KFOR k f o r +KFQC k f q c +KFQ k f q +KFRC k f r c +KFRC's k f r c's +KFRD k f r d +KFRG k f r g +KFRH k f r h +KFRO k f r o +KFSD k f s d +KFSN k f s n +KFST k f s t +KFT k f t +KFTL k f t l +KFUM's k f u m's +KFVD k f v d +KFV k f v +KFWB k f w b +KFXO k f x o +KFYO's k f y o's +KGA k g a +K.G.B. k g b +KGB k g b +KGC k g c +KGPH k g p h +KGP k g p +KGRI k g r i +kgr k g r +KGRK k g r k +KGS k g s +kgt k g t +KGT k g t +KGTV k g t v +KgU k g u +KGU k g u +KGUN k g u n +kgv k g v +K.G.V. k g v +KGWB k g w b +KGW k g w +KHHZ k h h z +KHI k h i +Khizr k h i z r +KHJ k h j +K. H. k h +K.H. k h +KH k h +KHK k h k +KHKV k h k v +Khlav k h l a v +KHL k h l +KHL's k h l's +Khmu k h m u +KHMY k h m y +KHOJ k h o j +KHOW k h o w +KHQA k h q a +KHQ k h q +Khri k h r i +Khru k h r u +KHRW k h r w +KHSAA k h s a a +KHS k h s +KHTL k h t l +KHTZ k h t z +KHUI k h u i +KHUT k h u t +KHWI k h w i +K.I.D. k i d +K. I. k i +K.I. k i +K.I.M. k i m +KIP k i p +K.I.S.S. k i s s +KJCE k j c e +KJCT k j c t +KJEF k j e f +Kjer k j e r +KJHK k j h k +KJI k j i +KJIN k j i n +kj k j +K. J. k j +K.J. k j +KJ k j +KJKP k j k p +KJMM k j m m +KJNE k j n e +KJNW k j n w +Kjop k j o p +KJo's k j o's +KJQY k j q y +KJV k j v +KJZI k j z i +KKBL k k b l +KKBQ k k b q +KKCR k k c r +KKDA k k d a +K.K.E. k k e +KKE k k e +K. K. k k +K.K. k k +KKMC k k m c +KKMI k k m i +KKM k k m +KKMX k k m x +KKRG k k r g +KKR k k r +KKSF k k s f +KK's k k's +KKSY k k s y +KKT k k t +KLA k l a +KLBK k l b k +KLBM k l b m +KLCI k l c i +KLC k l c +KLDS k l d s +Kle k l e +KLE k l e +KLF k l f +KLGH k l g h +KLH k l h +KLK k l k +kl k l +K. L. k l +K.L. k l +KL k l +KLKS k l k s +KLLP k l l p +KLLV k l l v +KLM k l m +KLN's k l n's +KLQQ k l q q +KLQT k l q t +KLRY k l r y +KLSC k l s c +KLSD k l s d +KLS k l s +KLSX k l s x +KLT k l t +KLX k l x +KMB k m b +KMC k m c +KMD k m d +KMGO k m g o +KMGT k m g t +KMIH k m i h +KMIR k m i r +KMITL k m i t l +KMJ k m j +K&M k and m +KMK k m k +km k m +K. M. k m +K.M. k m +KM k m +KMLA k m l a +KML k m l +KMME k m m e +KMML k m m l +KMRL k m r l +KMS k m s +KMSP k m s p +KMT k m t +KMTP k m t p +KMTV k m t v +KMVA k m v a +KMXW k m x w +KMZ k m z +KNFM k n f m +KNHP k n h p +K&N k and n +kn k n +K. N. k n +K.N. k n +KN k n +KNKT k n k t +KNLA k n l a +KNMI k n m i +KNNV k n n v +KNPN k n p n +KNPR k n p r +KNPU k n p u +KNRE k n r e +KNRS k n r s +KNS k n s +KNSM k n s m +KNST k n s t +KNTH k n t h +K.N.T. k n t +KNTN k n t n +KNTU k n t u +KNVA's k n v a's +KNVB k n v b +KNVN k n v n +KNX k n x +KNXT k n x t +K.O.D. k o d +k'o k o +K. O. k o +K.O. k o +KOTC k o t c +KOTD k o t d +K.O.T. k o t +KOWL k o w l +K. P. A. C. k p a c +KPAC k p a c +KPA k p a +KPAN k p a n +KPCC k p c c +KPC k p c +KPD k p d +KPE k p e +KPIF k p i f +KPI k p i +KPIs k p i's +K&P k and p +K. P. k p +K.P. k p +KP k p +KPLC k p l c +KPL k p l +KPLZ k p l z +KPMG k p m g +KPM k p m +KPNA k p n a +KPNB k p n b +KPO k p o +Kppen k p p e n +KPPV k p p v +KPS k p s +kptm k p t m +KPTV k p t v +KPU k p u +KPVD k p v d +KPVX k p v x +KPWR k p w r +KPXE k p x e +KQA k q a +KQED k q e d +KQHN k q h n +KQKK k q k k +KQKS k q k s +KQLT k q l t +KQLZ k q l z +KQMO k q m o +KQV k q v +KQXR k q x r +KRBM k r b m +krc k r c +KRCR k r c r +KRCs k r c's +KRF k r f +KRH k r h +K.R.I.T. k r i t +K.R.I.T.'s k r i t's +Krka k r k a +KRKC k r k c +KRK k r k +kr k r +K. R. k r +K.R. k r +KR k r +KRMT k r m t +Krne k r n e +KRNS k r n s +KRNV k r n v +KROQ k r o q +kroz k r o z +Kroz k r o z +KRPS k r p s +KRRT k r r t +K. R. S. k r s +KRS k r s +KRT k r t +Kru k r u +KRU k r u +KRWC k r w c +KRX k r x +K.R.Y. k r y +KRZA k r z a +ksa k s a +KSA k s a +K. S. B. k s b +KSBW k s b w +KSBY k s b y +KSCB k s c b +KSC k s c +KSCO k s c o +KSC's k s c's +KSCS k s c s +KSDB k s d b +KSDO k s d o +KSEB k s e b +KSEG k s e g +KSE k s e +KSEQ k s e q +kset k s e t +KSET k s e t +Kseur k s e u r +Ksevt k s e v t +KSFO k s f o +KSG k s g +KSHB k s h b +KSI's k s i's +KSJR k s j r +KSKJ k s k j +KSKK k s k k +K.S.K. k s k +KSK k s k +ks k s +K. S. k s +K.S. k s +KS k s +KSLC k s l c +KSL k s l +KSLM k s l m +KSLU k s l u +KSMB k s m b +KSNR k s n r +KSNV k s n v +KSP k s p +KSPS k s p s +KSTE k s t e +KSTH k s t h +KST k s t +KSTP k s t p +KSU k s u +K.S.V. k s v +KSWD k s w d +KSWT k s w t +KSWW k s w w +KSYL k s y l +KSZR k s z r +Ktav k t a v +KTAV k t a v +KTBS k t b s +KTCC k t c c +KTC k t c +KTCZ k t c z +KTDA k t d a +KTDD k t d d +KTE k t e +KTH k t h +KT&K k t and k +kt k t +K. T. k t +K.T. k t +KT k t +KTKZ k t k z +KTLA k t l a +K. T. M. k t m +KTM k t m +KTN k t n +KTOK k t o k +KTO k t o +KTRE k t r e +KTRH k t r h +KTRK k t r k +KTR k t r +KT's k t's +KTSM k t s m +KTT k t t +KTTN k t t n +KTTV k t t v +KTU k t u +KTUM k t u m +KTVA k t v a +KTVK k t v k +KTVS k t v s +KTVT k t v t +KTVU k t v u +KTVZ k t v z +KTWD's k t w d's +KTWO k t w o +KTX k t x +KTXL k t x l +Kuaa k u a a +Kud k u d +KUFM k u f m +KUGB k u g b +kuih k u i h +K. U. k u +K.U. k u +Kutb k u t b +KUTV k u t v +KUUU k u u u +KUWL k u w l +KUYI k u y i +KVAB k v a b +KVBC k v b c +KVB k v b +K.V.G.K. k v g k +KVHV k v h v +Kvik k v i k +KVI k v i +Kvit k v i t +KVIT k v i t +KVK k v k +K. V. k v +K.V. k v +KVLO k v l o +KVLY k v l y +KVM k v m +KVMRT k v m r t +KVMX k v m x +KVNG k v n g +KVN k v n +KVP k v p +KVTV k v t v +KVVS's k v v s's +KVVV k v v v +Kvyat k v y a t +KVZ k v z +kwa k w a +KWBM k w b m +KWC k w c +KWD k w d +KWEI k w e i +kwe k w e +KWG k w g +KWJJ k w j j +KWKH k w k h +KWKW k w k w +KWMR k w m r +KWMT k w m t +KWNA k w n a +KWNK k w n k +KWOA k w o a +KWP k w p +KWPT k w p t +KWQC k w q c +KWRE k w r e +KWRU k w r u +KWSB k w s b +KWSX k w s x +KWU k w u +KWWL's k w w l's +KXAN k x a n +KXAS k x a s +KXI k x i +KXJB k x j b +KXKS k x k s +KX k x +KXLX k x l x +KXLY k x l y +KXMC k x m c +KXNA k x n a +KXOL's k x o l's +KXTA k x t a +KXTN k x t n +KXTX k x t x +KYAY k y a y +KYBE k y b e +KYEZ k y e z +KYNM k y n m +Kyse k y s e +KYSL k y s l +KYTC k y t c +kyt k y t +Kyt k y t +KYW k y w +kz k z +K. Z. k z +KZ k z +KZLZ k z l z +KZMP k z m p +KZMU k z m u +KZQX k z q x +KZZQ k z z q +L. A. G. l a g +L. A. l a +L.A.'s l a's +Lay's l a y's +L.B.A. l b a +LBA l b a +LBCC l b c c +LBC l b c +LBC's l b c's +LBE l b e +LBF l b f +LBi l b i +LBJ l b j +LBJ's l b j's +L. B. l b +L.B. l b +LB l b +LBL l b l +LBM l b m +LBN l b n +LBNL l b n l +LBP l b p +LBPs l b p's +LBR l b r +LBS l b s +LBV l b v +LBW l b w +LCAC l c a c +LCA l c a +LCAs l c a's +LCAS l c a s +L.C.B. l c b +LCBL l c b l +LCBM l c b m +lcc l c c +LCC l c c +LCCN l c c n +LCCs l c c's +LCCS l c c s +LCDB l c d b +LCD l c d +LCDP l c d p +LC&DR l c and d r +LCDR l c d r +LCDs l c d's +L.C.E. l c e +LCFC l c f c +LCF l c f +LCH l c h +LCI l c i +LCIs l c i's +L. C. l c +L.C. l c +LC l c +LCL l c l +LCMC l c m c +LCM l c m +LCMR l c m r +LCMV l c m v +lcn l c n +LCO l c o +LCP l c p +LCQ l c q +LCR l c r +LCSB l c s b +LCSC l c s c +LC's l c's +LCS l c s +LCSs l c s's +LCST l c s t +LCT l c t +LCTs l c t's +LCU l c u +LCV l c v +LCVP l c v p +LCVPs l c v p's +LCZ l c z +Lda l d a +LDA l d a +LDAP l d a p +LDB l d b +LDBV l d b v +L.D.C. l d c +LDC l d c +LDCM l d c m +LDCs l d c's +LDD l d d +LDDP l d d p +LDF l d f +LDH l d h +L.D.I. l d i +LDK l d k +L. D. l d +L.D. l d +LDL l d l +LDLR l d l r +LDM l d m +LDMR l d m r +LDN l d n +LDP l d p +LDS l d s +LDU l d u +LDV l d v +LEB l e b +L. E. l e +L.E. l e +LFA l f a +L.F.C. l f c +LFC l f c +L.F.D. l f d +LFE l f e +LFFCs l f f c's +LFF l f f +LFG l f g +LFHC l f h c +LFH l f h +L. F. l f +L.F. l f +LFL l f l +LFM l f m +LFOC l f o c +LFO l f o +LFOM l f o m +LFPB l f p b +LFP l f p +LFQD l f q d +LFRJ l f r j +LFR l f r +LFs l f's +LFS l f s +LFSRs l f s r's +LFTR l f t r +LFXA l f x a +LGA l g a +LGB l g b +LGBTI l g b t i +LGBT l g b t +LGBTQ l g b t q +LGC l g c +LGD l g d +LGI l g i +L. G. l g +L.G. l g +LG l g +LGMB l g m b +LGMs l g m's +LGN l g n +LGPL l g p l +LGPZ l g p z +LG&RDD l g and r d d +LGR l g r +LGs l g's +LGS l g s +LGTB l g t b +LGT's l g t's +LGU l g u +LGUs l g u's +LGV l g v +LGVs l g v's +Lha l h a +LHB l h b +L. H. C. l h c +LHC l h c +LHD l h d +LHFP l h f p +LHICE l h i c e +LHI l h i +L. H. l h +L.H. l h +LHMC l h m c +L.H.M. l h m +LHO l h o +L. H. P. l h p +LHP l h p +LHS l h s +LHW l h w +LHX l h x +LHY l h y +L. I. l i +L.I. l i +LJBL l j b l +Lje l j e +LJJ l j j +L. J. K. l j k +L. J. l j +L.J. l j +L.J.V. l j v +LKB l k b +LKG l k g +LKK l k k +L. K. l k +LK l k +LKL l k l +LKML l k m l +LKPR l k p r +LKRN l k r n +lks l k s +LKS l k s +LKTI l k t i +Llapi l l a p i +LLAW l l a w +L.L.B. l l b +LLB l l b +LLCC l l c c +L.L.C. l l c +LLC l l c +LLD l l d +LL.D l l d +lle l l e +LLE l l e +LLHS l l h s +lli l l i +LLI l l i +L&L l and l +LLLE l l l e +L. L. l l +L.L. l l +LL l l +LLL l l l +LLLP l l l p +LLM l l m +LLMNR l l m n r +LLPFX l l p f x +llp l l p +LLP l l p +LLPX l l p x +LLRW l l r w +LLT l l t +LLTV l l t v +llu l l u +LLVM l l v m +LLWS l l w s +LMA l m a +LMC l m c +LMCT l m c t +LMDB l m d b +LMDC l m d c +LMD l m d +LME l m e +LMFAO's l m f a o's +LMFF l m f f +LMG l m g +LMGs l m g's +LMGTE l m g t e +LMH l m h +LMHS l m h s +lm l m +L. M. l m +L.M. l m +LM l m +LMM l m m +LMP l m p +LMPs l m p's +LMQ l m q +LMQs l m q's +L.M.S. l m s +LMS l m s +LMT l m t +LMTP l m t p +LMU l m u +LMWH l m w h +LMX l m x +LnAIB l n a i b +LNA l n a +LNAV l n a v +LNBF l n b f +LNB l n b +L.N.C. l n c +LNCS l n c s +L.N.E.R. l n e r +LNER l n e r +LNFS l n f s +LNG l n g +LNH l n h +L&N l and n +L. N. l n +L.N. l n +L.O.C.'s l o c's +Lokk l o k k +l'OL l o l +L.O.L. l o l +L. O. l o +L.O. l o +L.O.V.E. l o v e +L.O.V. l o v +LOXL l o x l +LPA l p a +L.P.A.M. l p a m +LPAM l p a m +LPARs l p a r's +LPAVS l p a v s +LPBG l p b g +L.P.B. l p b +LPB l p b +LPC l p c +LPCM l p c m +LPDA l p d a +LPDR l p d r +LPE l p e +LPF l p f +LPFM l p f m +LPGA l p g a +LPG l p g +L.P.H. l p h +LPH l p h +LPI l p i +LPLA l p l a +L&P l and p +LPL l p l +lp l p +L. P. l p +L.P. l p +LP l p +L&PM l and p m +LPMN l p m n +LPMud l p m u d +LPMUD l p m u d +LPN l p n +LPO l p o +LPRP l p r p +LPSC l p s c +LP's l p's +LPs l p's +LPS l p s +LPSN l p s n +LPTB l p t b +LPThe l p t h e +LPTV l p t v +LQ l q +LRAD l r a d +LRA l r a +LRCD l r c d +LRC l r c +L.R.C.P. l r c p +LRCP l r c p +LRDG l r d g +LRG l r g +lr l r +L. R. l r +L.R. l r +LR l r +LRO l r o +L.R.P.C. l r p c +LRP l r p +LRPPRC l r p p r c +LRRI l r r i +LRRP l r r p +LRS l r s +LRTA l r t a +LRT l r t +LRTR l r t r +LRTs l r t's +LRVs l r v's +LRY l r y +LSAC l s a c +LSA l s a +LSBC l s b c +LSB l s b +LSC l s c +LSCS l s c s +LSDHH l s d h h +L.S.D. l s d +LSD l s d +LSDP l s d p +LSDs l s d's +LSE l s e +LSF l s f +LSG l s g +LSH l s h +LSi l s i +LSI l s i +LSJ l s j +LSK l s k +L.S.L. l s l +ls l s +Ls l's +L. S. l s +L.S. l s +LSM l s m +LSO l s o +LSP l s p +LSPN l s p n +LSQC l s q c +LSRI l s r i +L. S. R. l s r +LSS l s s +LSSP l s s p +LSSR l s s r +LST l s t +LSTM l s t m +LSTs l s t's +LSUA l s u a +LSU l s u +LSU's l s u's +LSVCCs l s v c c's +LSV l s v +LSWR l s w r +LTAF l t a f +L. T. B. l t b +LTB l t b +LTCF l t c f +LTCI l t c i +L. T. C. l t c +L.T.C. l t c +LTC l t c +Ltda l t d a +LTDA l t d a +Ltd. limited +LTE l t e +LTFA l t f a +LTHS l t h s +LTI l t i +LTK l t k +L&T l and t +Lt. lieutenant +LTL l t l +lt l t +L. T. l t +L.T. l t +L.T.M. l t m +LTM l t m +LTMPS l t m p s +LTN l t n +ltoh l t o h +LTP l t p +LTRPC l t r p c +LTTE l t t e +LTTR l t t r +LTU l t u +LTV l t v +L. U. l u +lv l v +L. V. l v +L.V. l v +LV l v +LVMH l v m h +LWDB l w d b +LWD l w d +LWDS l w d s +LWE l w e +LWF l w f +LWH l w h +L. W. l w +L.W. l w +L.Y. l y +L&YR l and y r +M.A.D.E. m a d e +M.A.D. m a d +M.A.J. m a j +M.A.K. m a k +M. A. m a +M.A. m a +M&A m and a +M. A. O. m a o +M.A.O. m a o +M.A.R. m a r +M.A.S.K. m a s k +M.A.S. m a s +M.A.S.S. m a s s +M.B.A. m b a +M.B.B.S. m b b s +MBBS m b b s +MBC m b c +MBC's m b c's +MBDA m b d a +MBD m b d +M.B. D.P.M. m b d p m +M.B.E. m b e +MBE m b e +Mbewu m b e w u +MBF m b f +MBFW m b f w +MBGN m b g n +MBH m b h +mbi m b i +MBI m b i +M. B. J. m b j +MBJ m b j +MBK m b k +MBL m b l +M. B. m b +M.B. m b +MBM m b m +MBNA m b n a +mBo m b o +MBO m b o +MBPJ m b p j +MBP m b p +Mbre m b r e +mbr m b r +MBR m b r +MBSE m b s e +MBSI m b s i +MBS m b s +MBTI m b t i +MBT m b t +MBU's m b u's +MBX m b x +MCAF m c a f +MCAL m c a l +M.C.A. m c a +MCA m c a +MCA's m c a's +MCAS m c a s +MCB m c b +M. C. C. m c c +M.C.C. m c c +MCC m c c +MCC's m c c's +MCCs m c c's +MCCU m c c u +MCDA m c d a +MCDC m c d c +MCD m c d +MCDM m c d m +MCE m c e +MCFA m c f a +M. C. F. m c f +MCF m c f +MCFM m c f m +MCGJCW m c g j c w +MCG m c g +MCI m c i +MCJ m c j +MCLA m c l a +MCL m c l +M. C. m c +M.C. m c +MCMC m c m c +M. C. M. m c m +MCM m c m +MCN m c n +MCO m c o +MCOT's m c o t's +MCPC m c p c +MCPI m c p i +MCP m c p +MCPON m c p o n +MCPP m c p p +MCQ m c q +MCRD m c r d +MCR m c r +MCRP m c r p +MCSBA m c s b a +MCs m c's +M.C. S. m c s +MCS m c s +MCST m c s t +MCTFS m c t f s +MCT m c t +MCTU m c t u +MCTV m c t v +MCU m c u +MCV m c v +MCVTS m c v t s +MCW m c w +mcyG m c y g +MCYO m c y o +MCZ m c z +MDAA m d a a +MDAH m d a h +MDA m d a +mdb m d b +MDB m d b +MDC m d c +MDC's m d c's +MDCs m d c's +MDCT m d c t +mdDA m d d a +MD&DI m d and d i +Mde m d e +MDE m d e +MDF m d f +MDG m d g +MDGs m d g's +MDH m d h +MDHUs m d h u's +mDia m d i a +MDIB m d i b +MDic m d i c +MDI m d i +MDJT's m d j t's +MDK m d k +mdla m d l a +MDL m d l +MDMA m d m a +md m d +M. D. m d +M.D. m d +MD m d +MDM m d m +MDNA m d n a +M.D.N. m d n +MDPD's m d p d's +MDP m d p +MDPPP m d p p p +MDPS m d p s +MDPV m d p v +MDQ m d q +MDRC m d r c +mdr m d r +MDR m d r +MDT m d t +MDV m d v +MDX m d x +M. E. m e +M.E. m e +M.E.N. m e n +M. E. P. m e p +M.F.A. m f a +MFA m f a +MFB m f b +MFCC m f c c +MFC m f c +MFCS m f c s +MFD m f d +MFDs m f d's +MFe m f e +MFF m f f +MFG m f g +MFI m f i +MFJ m f j +MFK m f k +M. F. m f +M.F. m f +MFMF m f m f +MFM m f m +MFN m f n +MFNW m f n w +MFP m f p +MFR m f r +MFS m f s +MFS's m f s's +MFTBC m f t b c +MFT m f t +MFTs m f t's +Mgadla m g a d l a +Mgal m g a l +mga m g a +Mga m g a +MGA m g a +MGB m g b +Mgbo m g b o +MGCCC m g c c c +MGC m g c +MGE m g e +MGen m g e n +MGG m g g +MGH m g h +MGIMO m g i m o +MGIT m g i t +MGJH m g j h +MGK m g k +M.G.L. m g l +M&G m and g +M. G. m g +M.G. m g +M. G. M. m g m +MGM m g m +MGM's m g m's +MGMT m g m t +M&GN m and g n +MGN m g n +MGO m g o +MGP m g p +M&GR's m and g r's +Mha m h a +M.H.A. m h a +MHA m h a +MHB's m h b's +MHC m h c +MHCs m h c's +MHD m h d +MHI m h i +MHK m h k +MH&L m h and l +MHL m h l +M&H m and h +M. H. m h +M.H. m h +Mhor m h o r +Mhow m h o w +MHPL m h p l +M.H.P. m h p +MHP m h p +MHP's m h p's +MHRA m h r a +M. H. R. m h r +MHSA m h s a +MHSC m h s c +MHS m h s +MHT m h t +M.I.A. m i a +M.I.A.'s m i a's +M.I.C.M. m i c m +M.I.H. m i h +M&I m and i +M. I. m i +M.I. m i +M.I.N. m i n +M.I.R.V. m i r v +M.I.S. m i s +M.I.T. m i t +MJAHL's m j a h l's +M.J.A. m j a +MJA m j a +MJBHA m j b h a +MJB m j b +MJC m j c +M.J.F. m j f +MJF m j f +MJG m j g +MJHL m j h l +MJHL's m j h l's +M&J m and j +M. J. m j +M.J. m j +M. J. Y. m j y +MKBHD m k b h d +MKB m k b +MKDE m k d e +MKE m k e +MKFM m k f m +MKG m k g +MKK m k k +MKMF m k m f +M. K. m k +M.K. m k +MKNG m k n g +MKP m k p +MKRN m k r n +MKs m k's +MKS m k s +MKT m k t +MKTO m k t o +MKTV m k t v +MKZ's m k z's +M.L.A. m l a +MLA m l a +MLANA m l a n a +MLAs m l a's +MLB m l b +MLBPA m l b p a +MLCA m l c a +mlc m l c +M.L.C. m l c +MLC m l c +MLCs m l c's +MLD m l d +MLE m l e +MLF m l f +mlg m l g +MLG m l g +MLH m l h +MLIA m l i a +mli m l i +MLI m l i +MLIM m l i m +MLK m l k +MLK's m l k's +Mlle m l l e +MLL m l l +MLLT m l l t +M&L m and l +M. L. m l +M.L. m l +MLM m l m +MLND m l n d +M.L.N. m l n +MLN m l n +MLPH m l p h +MLP m l p +MLPs m l p's +M. L. R. m l r +MLR m l r +MLSE m l s e +M. L. S. m l s +M.L.S. m l s +MLS m l s +MLT m l t +MLW m l w +MLWS m l w s +MM&A m m and a +M. M. J. m m j +MMK m m k +MML m m l +M&M m and m +M. M. m m +M.M. m m +MMSA m m s a +M&M's m and m's +M. M. S. m m s +MMS m m s +MMST m m s t +MMTB's m m t b's +MMTS m m t s +MMTV m m t v +MMWR m m w r +MMX m m x +MNA m n a +MNBA m n b a +MNC m n c +MNCPPC m n c p p c +MNCs m n c's +MNDM m n d m +MNDNR m n d n r +MNDO m n d o +MNDR m n d r +M&NF m and n f +MNF m n f +MNG m n g +MNI m n i +MNLA m n l a +MNLA's m n l a's +MNLF m n l f +MNM m n m +M. N. m n +M.N. m n +MNNA m n n a +MNN m n n +MNP m n p +MNPP m n p p +MNRG m n r g +MNSD m n s d +MNS m n s +MNTC m n t c +MNT m n t +MNZM m n z m +MNZ m n z +MOBKL m o b k l +MOBK m o b k +M.O.B. m o b +M.O.D. m o d +M.O.D.O.K. m o d o k +M.O.G.U.E.R.A.'s m o g u e r a's +M. O. H. m o h +M. O. m o +M.O. m o +M.O.P. m o p +M.O.R. m o r +M.O.T. m o t +MPAA m p a a +MpA m p a +MPA m p a +MPAs m p a's +MPB m p b +MPBN m p b n +MPBu m p b u +MPCA m p c a +MPCI m p c i +MPC m p c +MPD m p d +MPE m p e +MPF m p f +MPG m p g +M.P.H. m p h +MPH m p h +MP&I m p and i +MPi m p i +MPI m p i +MPIO m p i o +MPKAB m p k a b +MPLAD m p l a d +MPLA m p l a +MPL m p l +MPLMs m p l m's +MPLR m p l r +MPLS m p l s +M&P m and p +MPM m p m +M. P. m p +M.P. m p +MPO m p o +MPPJ m p p j +M.P.P. m p p +MPP m p p +MPPSC m p p s c +MPPs m p p's +MPQC m p q c +MPR m p r +MPRO m p r o +MPRP m p r p +MPRS m p r s +MPSE m p s e +MPSF m p s f +M.P.'s m p's +MP's m p's +MPs m p's +MPS m p s +MPThe m p t h e +MPT m p t +Mpu m p u +MPU m p u +MPV m p v +MPW m p w +MPZ m p z +MQM m q m +M. Q. m q +Mra m r a +MRA m r a +M. R. B. m r b +MRB m r b +MRBs m r b's +MRCB m r c b +MRCK m r c k +MRCM m r c m +MRC m r c +M.R.C.S. m r c s +MRCS m r c s +MRDC m r d c +MRD m r d +mre m r e +MRF m r f +MR&LE m r and l e +MRL m r l +Mr. mister +MRM m r m +M. R. m r +M.R. m r +mRNA m r n a +mRNAs m r n a's +MRPGM m r p g m +MRP m r p +MRPS m r p s +MRR m r r +MRSA m r s a +M.R.S.C. m r s c +Mrs. misses +MRSM m r s m +MRT m r t +MRTs m r t's +MRTS m r t s +MRTT m r t t +MRU m r u +MSAA m s a a +MSAC m s a c +MSAD m s a d +MSA m s a +M.S.A.S. m s a s +MSAW m s a w +MSB m s b +MSBO m s b o +MSBSD m s b s d +MSCC m s c c +M.S.C. m s c +MSC m s c +msd m s d +MSD m s d +MSDN m s d n +MSDS m s d s +Mse m s e +MSFC m s f c +M.S.F. m s f +MSF m s f +MSFT m s f t +MSG m s g +MSI m s i +MSJ m s j +MSK m s k +MSL m s l +M&S m and s +Ms. miss +MSML m s m l +M.S.M. m s m +MSM m s m +M. S. m s +M.S. m s +MSNBC m s n b c +MSNBC's m s n b c's +MSN m s n +MSOB m s o b +MSOM m s o m +M.S.P. m s p +MSP m s p +MSPs m s p's +MSR m s r +MSRP m s r p +MSRTC m s r t c +MSSH m s s h +MSSK m s s k +MSSMLP m s s m l p +MSSM m s s m +MSS m s s +MSTA m s t a +mst m s t +MST m s t +MSTS m s t s +MSU's m s u's +MSVCC m s v c c +MSV m s v +MSVU m s v u +MSX m s x +MSY m s y +MSZP m s z p +MTAC m t a c +Mta m t a +M.T.A. m t a +MTA m t a +MTA's m t a's +MTAs m t a's +MTBE m t b e +MTBI m t b i +MTB m t b +MTC m t c +MTDB m t d b +M.T.D. m t d +MTD m t d +MTD's m t d's +MTG m t g +MTHFR m t h f r +MTHL m t h l +MTIC m t i c +MTI m t i +MTJ m t j +MTK m t k +MTKO m t k o +M&T m and t +MTM m t m +MTMR m t m r +M. T. m t +M.T. m t +Mtor m t o r +MTOSI m t o s i +M.T.O.W. m t o w +MTPA m t p a +MTPC m t p c +MTP m t p +MTQ m t q +MTRCB m t r c b +MTRC m t r c +MTRJ m t r j +MTR m t r +M. T. S. m t s +MTS m t s +MTSU m t s u +MTTM m t t m +MTT m t t +MTU m t u +MTV m t v +MTVR m t v r +MTV's m t v's +mtvU m t v u +MTVu m t v u +MTVU m t v u +MTY m t y +MTZ m t z +MUKW m u k w +M. U. m u +MUSL m u s l +M.U.s m u's +M. U.S. m u s +MVA m v a +M. V. C. m v c +M.V.C. m v c +MVC m v c +M. V. D. m v d +MVD m v d +MVH m v h +MVK m v k +M.V.M. m v m +MVM m v m +MVMs m v m's +M. V. m v +M.V. m v +M.V.O. m v o +MVO m v o +MVP m v p +MVPs m v p's +MVPS m v p s +M.V. P.T. m v p t +MVs m v's +MVS m v s +MVSN m v s n +MVSR m v s r +MVT m v t +MVV m v v +M.W.A.M. m w a m +MWA m w a +MWC m w c +MWe m w e +M. W. E. m w e +MWF m w f +MWHL m w h l +MWIFF m w i f f +MWI m w i +MWJCHL m w j c h l +MWJHL m w j h l +M. W. J. m w j +M. W. m w +M.W. m w +M. X. m x +myb m y b +myc m y c +Myc m y c +MYC m y c +MYCN m y c n +MyDD m y d d +M. Y. m y +M.Y. m y +MySQL m y s q l +MYSQL m y s q l +MZC m z c +MZH m z h +MZM m z m +M. Z. m z +MZP m z p +MZT m z t +NAACCR n a a c c r +NAACL n a a c l +NAAC n a a c +N.A.A.C.P. n a a c p +NAACP n a a c p +NAACP's n a a c p's +N.A.C.L. n a c l +NAFBL n a f b l +NAFC n a f c +NAFC's n a f c's +NAFH n a f h +NAFI n a f i +N. A. n a +N.A. n a +NBADL n b a d l +N.B.A. n b a +NBA n b a +N.B.A.'s n b a's +NBA's n b a's +NBAs n b a's +NBCC n b c c +NBC n b c +NBC's n b c's +NBCSN n b c s n +NBDL n b d l +NBD n b d +NBFA's n b f a's +NBF n b f +NBG n b g +NBI n b i +NBK n b k +NBL n b l +N. B. n b +N.B. n b +N.B.N. n b n +NBN n b n +NBP n b p +NBQ n b q +NBR n b r +NBR's n b r's +NBSK n b s k +nbs n b s +NBS n b s +NBSP n b s p +NBTE n b t e +NBT n b t +NBTwo n b t w o +NBTY n b t y +NBW n b w +NCAAs n c a a's +NCAM n c a m +NCA n c a +NCAP n c a p +NCBC n c b c +NCBI n c b i +NCCC n c c c +NCCF n c c f +NCCMH n c c m h +NCC n c c +NCCP n c c p +NCCs n c c's +NCCS n c c s +NCCU n c c u +NCDC n c d c +NCDD's n c d d's +NCD n c d +NCES n c e s +NCFA n c f a +N.C.F. n c f +NCF n c f +NCG n c g +NCGS n c g s +NCHC n c h c +NCH n c h +NCHU n c h u +NCID n c i d +NCI n c i +NCIS n c i s +NCIS's n c i s's +NCKU n c k u +NCLB n c l b +NCLC n c l c +NCL n c l +NCLR n c l r +NCLT n c l t +NCMA n c m a +NCMC n c m c +NCMM n c m m +NCM n c m +NCMP n c m p +NCNB n c n b +N. C. n c +N.C. n c +N.C.O. n c o +NCO n c o +NCO's n c o's +NCOs n c o's +NCPA n c p a +NCPC n c p c +NCP n c p +NCPO n c p o +NCRC n c r c +NCR n c r +NCSU n c s u +NCSY n c s y +NCTA n c t a +NCTC n c t c +NCTE n c t e +NCT n c t +NCUA n c u a +NCVO n c v o +NCVS n c v s +NCVT n c v t +NCWM n c w m +NCW n c w +NDA n d a +NDE n d e +NDEP n d e p +NDFB n d f b +NDFT n d f t +N.D.G. n d g +N. D. n d +N.D. n d +N.D.N.Y. n d n y +NDPH n d p h +NDPK n d p k +N.D.P. n d p +NDP n d p +NDP's n d p's +NDRC n d r c +NDRE n d r e +NDRF n d r f +N.E.A.R. n e a r +NECW n e c w +N. E. n e +N.E. n e +N.E.R.D. n e r d +NERFU n e r f u +ner n e r +N.E.R. n e r +NER n e r +N.E.W.S.T. n e w s t +NFA n f a +NFATc n f a t c +NFAT n f a t +NFB n f b +NFB's n f b's +NFC n f c +NFC's n f c's +NFD n f d +NFEA n f e a +nfed n f e d +NFFC n f f c +NFF n f f +NFH n f h +NFHS n f h s +NFIB n f i b +NFI n f i +NFISD n f i s d +N.F.L. n f l +NFL n f l +NFLPA n f l p a +NFL's n f l's +N. F. n f +N.F. n f +NFO n f o +NFPA n f p a +NFPF n f p f +NFP n f p +NFPW n f p w +NFRS n f r s +NFS n f s +NGC n g c +Nge n g e +NGF n g f +N.G.L. n g l +N. G. n g +N.G. n g +NGO n g o +NGO's n g o's +NGOs n g o's +NGRC n g r c +N.G.R. n g r +NGR n g r +NGRR n g r r +NGS n g s +NGSS n g s s +NGST n g s t +NGTC n g t c +Nha n h a +NHA n h a +NHCEs n h c e's +NHC n h c +NHCP n h c p +NHCs n h c's +NHD n h d +NHHD n h h d +NHHI n h h i +nhi n h i +NHI n h i +NHK n h k +NHK's n h k's +N.H.L. n h l +NHM&W n h m and w +N. H. n h +N.H. n h +NHP n h p +NHPs n h p's +NHPS n h p s +NHRA n h r a +NHRC n h r c +NHSCT n h s c t +NHS n h s +NHST n h s t +NHW n h w +N.I.B.B.L.E. n i b b l e +NIBC n i b c +NIBR n i b r +N. I. n i +N.I. n i +N'I n i +N.J.A.C. n j a c +N. J. A. n j a +NJA n j a +N. J. C. n j c +NJC n j c +NJIT n j i t +N. J. M. n j m +NJM n j m +N. J. n j +N.J. n j +NJN n j n +Njoo n j o o +NJPS n j p s +NJPW n j p w +N.J.S.A. n j s a +N.J.'s n j's +NKL n k l +N. K. n k +N.K. n k +N. K. N. n k n +NKP n k p +NKP's n k p's +NKR n k r +NKT n k t +NKU n k u +nkvd n k v d +NKVD n k v d +NKVM n k v m +nkv n k v +NKX n k x +NLAES n l a e s +NLAI n l a i +NLA n l a +NLB n l b +NLCF n l c f +NLC n l c +NLCS n l c s +NLDC n l d c +NLD n l d +NLD's n l d's +NLDS n l d s +N. L. n l +N.L. n l +NLP n l p +NLRB n l r b +NLRP n l r p +NLRs n l r's +NLW n l w +NLX n l x +NMAA n m a a +NMA n m a +NMBS n m b s +NMBU n m b u +NMCA n m c a +NMCB n m c b +NMC n m c +N. M. n m +N.M. n m +NMP n m p +NMR n m r +NMRW n m r w +NMT n m t +NMU n m u +NMV n m v +NMW n m w +NNCL n n c l +NNFL n n f l +NNF n n f +N. N. n n +N.N. n n +NNSA n n s a +NNSS n n s s +NNSU n n s u +NNSW n n s w +N.O.R. n o r +NPA n p a +NPBD n p b d +NPBL n p b l +NPB n p b +NPC n p c +NPCs n p c's +NPDC n p d c +NPD n p d +NPF n p f +NPF's n p f's +NPGL n p g l +NPG n p g +NPGS n p g s +NPH n p h +NPHS n p h s +NPIM n p i m +NPI n p i +NPJ n p j +NPK n p k +NPL n p l +NPMA n p m a +NPM n p m +NPMs n p m's +N. P. n p +N.P. n p +NP n p +NPO n p o +NPP n p p +NPP's n p p's +NPRL n p r l +NPRN n p r n +NPR n p r +NPRR n p r r +NPR's n p r's +NPRSO n p r s o +NPSC n p s c +NPSG n p s g +NPSL n p s l +NP's n p's +NPs n p's +NPS n p s +NPTI n p t i +NPT n p t +NPV n p v +NPWE n p w e +NPWS n p w s +NPY n p y +NQAI n q a i +NQEA n q e a +NQHS n q h s +N. Q. n q +nri n r i +Nri n r i +NRI n r i +NRIs n r i's +NRJ n r j +NRJs n r j's +NRK n r k +NRLA n r l a +NRL n r l +NRL's n r l's +NRMA n r m a +NRM n r m +NRN n r n +nr n r +N. R. n r +N.R. n r +N.R.A. n r a +NSA n s a +NSAP n s a p +NSA's n s a's +NSB n s b +NSC n s c +NSD n s d +NSE n s e +NSFE n s f e +NSF n s f +NSFW n s f w +NSG n s g +NSI n s i +NSL n s l +NSM n s m +NSN n s n +N. S. n s +N.S. n s +NSO n s o +NSOs n s o's +NSPCC n s p c c +NSPC n s p c +NSP n s p +NSRI n s r i +NSRL n s r l +NSR n s r +NSSCD n s s c d +NSSDC n s s d c +NSS n s s +NSTA n s t a +NST n s t +NSU n s u +NSV n s v +NSVT n s v t +NSWBLF n s w b l f +NSWC n s w c +NSWCV n s w c v +NSWEC n s w e c +NSWGR n s w g r +N.S.W. n s w +NSW n s w +Nta n t a +NTA n t a +NTA's n t a's +ntb n t b +NTB n t b +NTC n t c +NTD n t d +NTFL n t f l +NTF n t f +NTI n t i +NTKF n t k f +NTL n t l +N. T. n t +N.T. n t +NTR n t r +NTR's n t r's +NTSB n t s b +NTSB's n t s b's +NTSC n t s c +NTS n t s +NTTF n t t f +NTT n t t +NTTR n t t r +NTUC n t u c +NTU n t u +NTVB n t v b +NTV n t v +ntw n t w +N.U. n u +NVA n v a +NVAO n v a o +NVC n v c +NVDA n v d a +NvDA's n v d a's +NVFC n v f c +NVGOP n v g o p +NVHJ's n v h j's +NVH n v h +NVI n v i +NVL n v l +N. V. n v +N.V. n v +NWAFU n w a f u +nwa n w a +N.W.A. n w a +NWA n w a +NWA's n w a's +N.W.F.P. n w f p +NWFP n w f p +NWMP n w m p +N&W n and w +NWN n w n +N. W. n w +N.W. n w +NWPD n w p d +N.W.P. n w p +NWP n w p +NWS n w s +N.W.T. n w t +NWT n w t +nyc n y c +N.Y.C. n y c +NYC n y c +NYC's n y c's +N.Y. G.O.P. n y g o p +N.Y.L. n y l +N. Y. n y +N.Y. n y +N.Y.P.D. n y p d +NYPD n y p d +NYPD's n y p d's +NYPL n y p l +N.Y.P. n y p +NY's n y's +N.Y.S. n y s +N.Y.S.V. n y s v +N.Y.U. n y u +NYU n y u +NYU's n y u's +NYYC n y y c +NZAID n z a i d +NZAOD n z a o d +NZCA n z c a +NZDT n z d t +NZEF n z e f +NZETC n z e t c +NZFC n z f c +NZF n z f +N'Zif n z i f +NZiK n z i k +N'Zi n z i +NZIV n z i v +NZL n z l +NZLP n z l p +N.Z. n z +NZPA n z p a +NZSAS n z s a s +NZ's n z's +NZS n z s +NZTA n z t a +N.Z.W.P.W. n z w p w +O.A.C. o a c +O. A. o a +O.A. o a +O.A.R. o a r +O.A.S. o a s +O.B.E. o b e +obl o b l +Oblt o b l t +O.B. o b +O. C. o c +O.C. o c +Octl o c t l +O.C.T. o c t +O. D. o d +O.E. o e +O.F.M. o f m +O.F. o f +ofr o f r +O.F.R. o f r +OFR o f r +O.F.T.B. o f t b +OGL o g l +O&G o and g +O. G. o g +O.G. o g +O.G.S. o g s +OGS o g s +O.H.A. o h a +O. H. o h +O.H. o h +OHV o h v +O. I. o i +O.I. o i +O. J. o j +O.J. o j +OJ o j +OJSC o j s c +OKBM o k b m +OKB o k b +OKC o k c +OK'd o k d +OKd o k d +OKD o k d +oke o k e +Oke o k e +OKH o k h +O.K.I. o k i +OKK o k k +OKM o k m +O&K o and k +O. K. o k +O.K. o k +O.L.F.A.L. o l f a l +O. L. K. o l k +O. L. o l +O.L. o l +Olo o l o +Olov o l o v +OLPC o l p c +OLPH o l p h +ols o l s +OLs o l's +OLS o l s +OLSR o l s r +OLTL o l t l +OLVT o l v t +olvwm o l v w m +olwm o l w m +OMB o m b +OMC o m c +OMCS o m c s +OMD o m d +O&MFL o and m f l +OMF o m f +omg o m g +OMG o m g +OMGs o m g's +O. M. o m +O.M. o m +OmOm o m o m +Om's o m's +OMTP o m t p +OMVG o m v g +OMW o m w +OMX o m x +ONCHR o n c h r +ONC o n c +oncu o n c u +ONDH o n d h +ond o n d +onf o n f +ONF o n f +ONGC o n g c +ONIR o n i r +ONJSC o n j s c +ONM o n m +ONMR o n m r +O. N. o n +O.N. o n +O&O o and o +OOOA o o o a +O. O. o o +O.O. o o +OO o o +OOO's o o o's +Oop o o p +OOP o o p +OOPSLA o o p s l a +oor o o r +oose o o s e +Oo's o o's +OOTP o o t p +Ootw o o t w +OOUR o o u r +OPAC o p a c +OPAG o p a g +O.P.A. o p a +OPBF o p b f +OPB o p b +OPC o p c +OPCS o p c s +OPCW o p c w +OPD o p d +O.P.I. o p i +OPIRG o p i r g +OPJHL o p j h l +OPLC o p l c +OPL o p l +OPMB o p m b +OPM o p m +OPMs o p m's +OPN o p n +O. P. o p +O.P. o p +opr o p r +Opr o p r +OPR o p r +O.R.C. o r c +ORECA o r e c a +ORFs o r f's +ORMO o r m o +ORNL o r n l +OR&N o r and n +O. R. o r +osaa o s a a +OSAA o s a a +osa o s a +Osa o s a +O.S.A. o s a +OSA o s a +O.S.B. o s b +OSB o s b +OSBs o s b's +O.S.N. o s n +OSN o s n +O. S. o s +O.S. o s +OSP o s p +OSR o s r +OSTM o s t m +O.S.T. o s t +OSTP o s t p +OSTs o s t's +osv o s v +OSV o s v +OTB o t b +OTC o t c +OTJ o t j +OTL o t l +OTMH o t m h +OTO o t o +O.T. o t +OTP o t p +OTR o t r +OTs o t's +OTS o t s +Otu o t u +OTU o t u +otv o t v +O.U. o u +OU o u +OUP o u p +OUSA o u s a +OVA's o v a's +ovca o v c a +Ovca o v c a +OVC o v c +OVC's o v c's +Ovda o v d a +OVF o v f +OVM o v m +Ovo o v o +OVO o v o +ov o v +Ov o v +O. V. o v +O.V. o v +OV o v +OVP o v p +OVS o v s +OVT o v t +OVW o v w +O.W.A. o w a +OWBT o w b t +OWCs o w c's +OWGR o w g r +OWHA's o w h a's +OWIU o w i u +OWM o w m +O. W. o w +O.W. o w +P. A. C. p a c +P. A. J. p a j +P.A.J. p a j +P. A. M. p a m +P.A.M. p a m +pa p a +P. A. p a +P.A. p a +PA p a +PAQ p a q +P.A.R. p a r +P. A. S. p a s +PBAA p b a a +PBA p b a +PBA's p b a's +PBB p b b +PBCC p b c c +pBCE p b c e +pbc p b c +P.B.C. p b c +PBF p b f +PBG p b g +PBIL p b i l +PBI p b i +PBK p b k +PBL p b l +PBM p b m +PBMR p b m r +PBO p b o +pb p b +P. B. p b +P.B. p b +PB p b +PBP p b p +PBR p b r +PBS p b s +PBT p b t +PBX p b x +PBY p b y +P. C. A. p c a +PCA p c a +PCB p c b +PCB's p c b's +PCBs p c b's +PCBS p c b s +PCC p c c +PCCS p c c s +PCCW p c c w +PCCW's p c c w's +PCDDs p c d d's +PCDHB p c d h b +PCDH p c d h +PCD p c d +PCE p c e +PCeU p c e u +PCFCL p c f c l +PCFL p c f l +PCF p c f +PCFS p c f s +PCG p c g +PCGS p c g s +PCHA p c h a +PCHA's p c h a's +P.C.H. p c h +PCH p c h +PCHR p c h r +PCIe p c i e +pci p c i +PCI p c i +PCJHL p c j h l +P. C. J. p c j +PCJ p c j +PCK p c k +PCLM p c l m +pcl p c l +PCL p c l +PCMCIA p c m c i a +PCMNO p c m n o +PCM p c m +PCMS p c m s +PCNA p c n a +PCN p c n +PcoA p c o a +PCOE p c o e +PCO p c o +pc p c +P. C. p c +P.C. p c +PC p c +PCPFL p c p f l +PCP p c p +PCPV p c p v +PCRC p c r c +PCRE p c r e +PCRev p c r e v +PCRM p c r m +PCR p c r +PCSK p c s k +PCSOM p c s o m +pcs p c s +P.C.s p c's +PC's p c's +PCTL p c t l +PCT p c t +PCTV p c t v +PCU p c u +PCU's p c u's +PCW p c w +PCX p c x +pDAB p d a b +PDAB p d a b +PDA p d a +PDAs p d a's +PDBML p d b m l +PDB p d b +pdbp p d b p +PDBsum p d b s u m +PDCI p d c i +PDC p d c +PDCPD p d c p d +PDC's p d c's +PDCs p d c's +PD&D p d and d +PDD p d d +PDE p d e +PDEs p d e's +PDES p d e s +pdf p d f +PDF p d f +PDFs p d f's +PDFT p d f t +PDGF p d g f +PDG p d g +PDHJ p d h j +PDH p d h +PDI p d i +PDJ p d j +PDK p d k +PDL p d l +PDN p d n +PDO p d o +P&D p and d +P. D. p d +P.D. p d +PD p d +PDP p d p +PDP's p d p's +PDPs p d p's +P. D. Q. p d q +PDR p d r +PDSA p d s a +PDSI p d s i +PD's p d's +PDs p d's +P.D.S. p d s +PDS p d s +PDSP p d s p +PDT p d t +P.E.I. p e i +P.E.I.'s p e i's +P.E.N. p e n +P. E. p e +P.E. p e +P.F.L. p f l +PF's p f's +PFs p f's +PFS p f s +PFU p f u +PFV p f v +PFW&C p f w and c +P.G.A. p g a +PGA p g a +PGM p g m +PGMs p g m's +PG&N p g and n +P&G p and g +P. G. p g +P.G. p g +P.G.T. p g t +PHD p h d +P. H. G. p h g +PHH p h h +PHHS p h h s +PHLF p h l f +Phlo p h l o +PHL p h l +PHN p h n +P&H p and h +P. H. p h +P.H. p h +php p h p +PHP p h p +PHPs p h p's +P.I.D.E. p i d e +P&I p and i +P. I. p i +P.I. p i +P. I. W. p i w +Piz p i z +PJB p j b +PJC p j c +PJD p j d +P. J. F. p j f +P.J.K. p j k +pj p j +P. J. p j +P.J. p j +PJ p j +PJ's p j's +PJs p j's +P.J.T. p j t +P. K. p k +P.K. p k +PK p k +PLCC p l c c +plc p l c +PLC p l c +PLC's p l c's +PLCs p l c's +P.L.O. p l o +P. L. p l +P.L. p l +PLP p l p +PLX p l x +P. M. A. p m a +PMC p m c +PMG p m g +PML p m l +PMLP p m l p +PMMA p m m a +PMMoV p m m o v +PMM p m m +PMMS p m m s +PMMT p m m t +PMOI p m o i +PMO p m o +PMPC p m p c +PMPF p m p f +p.m. p m +p.m p m +P. M. p m +P.M. p m +PM p m +PMP p m p +PMP's p m p's +PMRN p m r n +PMRO p m r o +PMR p m r +PMRR p m r r +PMSE p m s e +PMSL p m s l +PMSM p m s m +PM's p m's +PMs p m's +P. M. S. p m s +PMS p m s +PMSSY p m s s y +PMT p m t +PMVY p m v y +PMWA p m w a +PNaCl p n a c l +pna p n a +PNA p n a +Pnau p n a u +pnb p n b +PNB p n b +PNC p n c +PNE p n e +PNE's p n e's +PNEs p n e's +PNETs p n e t's +pneus p n e u s +PNFA p n f a +PNF p n f +PNGIA p n g i a +png p n g +PNG p n g +PNH p n h +PNI p n i +PNL p n l +PNL's p n l's +PNMT p n m t +PNNL p n n l +PNNs p n n's +PNoy p n o y +pn p n +P. N. p n +P.N. p n +PN p n +PNP p n p +P.N.R.A. p n r a +P.O.D. p o d +P.O.D.'s p o d's +P&O p and o +P. O. p o +P.O. p o +P.O.S. p o s +P.O.V. p o v +P.O.W. p o w +P.O.W.'s p o w's +PPACA p p a c a +PPA p p a +PPBS p p b s +PPC p p c +PPD p p d +PPE p p e +PPF p p f +PPG p p g +P&PH p and p h +PPH p p h +PPi p p i +P.P.I. p p i +PPI p p i +PPJ p p j +PPKM p p k m +PPK p p k +PPL p p l +PPM p p m +PPNB p p n b +PPN p p n +PPO p p o +P. P. p p +P.P. p p +PPRP p p r p +PPR p p r +PP's p p's +PPS p p s +PPT p p t +PPTV p p t v +PPV p p v +PPVs p p v's +PPy p p y +PQDT p q d t +PQ p q +PQQ p q q +PQ's p q's +PQS p q s +PRB p r b +PRCA p r c a +PRC p r c +PRCS p r c s +PRDM p r d m +PRD p r d +PRD's p r d's +P.R.I.M.E. p r i m e +pr p r +P. R. p r +P.R. p r +PR p r +P.S.A. p s a +P.S.C. p s c +P.S.K. p s k +PSP p s p +P. s p's +P.'s p's +Ps p's +P. S. p s +P.S. p s +PSQL p s q l +PSR p s r +PSRU p s r u +PSSA p s s a +PSSA's p s s a's +PSSAs p s s a's +PSSI p s s i +PSS p s s +PSS's p s s's +PST p s t +PSU p s u +PSV p s v +PSW p s w +pTA p t a +PTA p t a +PTA's p t a's +PTAs p t a's +PTB p t b +PTBT p t b t +PTCH p t c h +PTC p t c +Pte p t e +PTEs p t e's +PTFE p t f e +ptf p t f +PTF p t f +PTH p t h +Ptie p t i e +PTI p t i +PTI's p t i's +PTK p t k +PTLLS p t l l s +PTL p t l +PTLs p t l's +PTNNT p t n n t +Ptol p t o l +PTO p t o +PTPN p t p n +PTP p t p +pt p t +P. T. p t +P.T. p t +PT p t +PTSD p t s d +PTSE p t s e +PTS p t s +PTTGC p t t g c +PTTG p t t g +PTTOW p t t o w +PTT p t t +PTTs p t t's +PTUN p t u n +PTV p t v +PTV's p t v's +P'Twa p t w a +PTY p t y +P.U.F. p u f +Puiu p u i u +PUKKE p u k k e +PUK p u k +Pul p u l +P. U. M. p u m +PVA p v a +PVAs p v a's +PVB p v b +P.V.C. p v c +PVC p v c +PvdA p v d a +PVEM p v e m +pve p v e +PVH p v h +P.V.L. p v l +PVL p v l +PVN p v n +PVO p v o +PVP p v p +P. V. p v +P.V. p v +PV p v +pvr p v r +PVR p v r +PVRS p v r s +PVs p v's +PVS p v s +PVU p v u +PWA p w a +PWB p w b +PWC p w c +PWD p w d +PWF p w f +P. W. G. p w g +P.W.G. p w g +PWG p w g +PWI p w i +pwll p w l l +PWM p w m +P.W.O. p w o +P&W p and w +P. W. p w +P.W. p w +PW p w +Pyi p y i +PYI p y i +Pyk p y k +PYK p y k +Pyl p y l +PyL p y l +PYP p y p +P. Y. p y +Pyu p y u +Pyw p y w +Pyx p y x +PZB p z b +PZL p z l +PZP p z p +P. Z. p z +Q. A. P. q a p +QAP q a p +Q. A. q a +QA q a +Q&A q and a +Q&A's q and a's +Q&As q and a's +QbA q b a +QBE q b e +QBH q b h +QBL q b l +Q.B. q b +QB q b +Q.C.B. q c b +QCD q c d +Q.C. q c +QD q d +QDS q d s +QEA q e a +QEC q e c +Q.E.D. q e d +QED q e d +Q.E.H. q e h +Q. E. q e +Q.H.C. q h c +QHP q h p +Q.H. q h +Q.I. q i +Q. J. q j +QJ q j +Q. J. R. q j r +Q. N. q n +QPF q p f +QPFS q p f s +QPM q p m +QPO q p o +QPOs q p o's +QPP q p p +QP q p +Q.P.R. q p r +QPR q p r +QQ q q +QRL q r l +QRL's q r l's +QRNA q r n a +QROPS q r o p s +QRP q r p +QR q r +QRS q r s +QRT q r t +QRV q r v +QSAR q s a r +QSES q s e s +QSI q s i +QSL q s l +QSM q s m +QSO q s o +QST q s t +QSY q s y +QVC q v c +QVD q v d +QVM q v m +Q.V. q v +R.A.B. r a b +R. A. E. r a e +R.A.F. r a f +R.A.J. r a j +R. A. r a +R.A. r a +R&A r and a +RAV r a v +R&AW r and a w +RAZR r a z r +RBAC r b a c +RbAg r b a g +RBA r b a +RBBP r b b p +RBC r b c +RBCs r b c's +RBD r b d +RBE r b e +RBGE r b g e +RBG r b g +RBI r b i +RBIs r b i's +R. B. J. r b j +R.B.J. r b j +RBK r b k +R.B.L. r b l +RBL r b l +RBMG r b m g +RBMG's r b m g's +RBM r b m +RBMY r b m y +RBO r b o +RBP r b p +R&B r and b +R. B. r b +R.B. r b +RBR r b r +RBS r b s +RBTCO's r b t c o's +RBTH r b t h +RBWH r b w h +RBW r b w +RBX r b x +RBZ r b z +RCAC r c a c +RCAF r c a f +rca r c a +R.C.A. r c a +RCA r c a +RCA's r c a's +RCBC r c b c +RCB r c b +RCCA r c c a +RCCD r c c d +RCCL r c c l +RCCM r c c m +RCC r c c +RCI r c i +R. C. K. r c k +RCL r c l +R.C.M. r c m +RCM r c m +RCN r c n +RCPO r c p o +RCP r c p +RCPT r c p t +R&C r and c +rc r c +R. C. r c +R.C. r c +RCRD r c r d +RCR r c r +RCSB r c s b +RCS r c s +RCT r c t +RCTs r c t's +RCTS r c t s +RCTV r c t v +RCV r c v +RCW r c w +RDB r d b +RDC r d c +RDD r d d +RdE r d e +RDFC r d f c +RDF r d f +R.D.G. r d g +R&D r and d +R. D. r d +R.D. r d +R.E.A. r e a +R. E. B. r e b +R.E.D. r e d +R.E.L. r e l +R.E.M. r e m +R.E.M.'s r e m's +R.E.O. r e o +REPL r e p l +REPLs r e p l's +R.E.P. r e p +R. E. r e +R.E. r e +R.E.R. r e r +R.F.A. r f a +R.F.C. r f c +R. F. r f +R.F. r f +R. F.S. r f s +R.F.W. r f w +RFW r f w +RFX r f x +RGB r g b +RGD r g d +RGF r g f +RGG r g g +RGI r g i +RGK r g k +RGMA r g m a +RGNL r g n l +RGPH r g p h +R&G r and g +R. G. r g +R.G. r g +RGR r g r +RGS r g s +RGS's r g s's +RGTP r g t p +RGU r g u +RGV r g v +RGX r g x +R. H. C. r h c +RHC r h c +RHD r h d +RHHF r h h f +RHH r h h +RHHS r h h s +RHIBs r h i b's +RHIC r h i c +RHI r h i +Rhiw r h i w +RHK r h k +rhl r h l +RHM r h m +RhoG r h o g +Rho's r h o's +RHP r h p +R. H. r h +R.H. r h +R.H.S.J. r h s j +R. I. C. r i c +R.I.C. r i c +R. I. P. r i p +R.I.P. r i p +R&I r and i +R. I. r i +R.I. r i +riu r i u +Riu r i u +rivs r i v s +Rivu r i v u +RIXS r i x s +Rixt r i x t +RJB r j b +RJD r j d +RJE r j e +rjf r j f +RJHS r j h s +R. J. J. r j j +RJJ r j j +RJL r j l +RJN r j n +R. J. r j +R.J. r j +RJ r j +RJR r j r +RJ's r j's +RJTD r j t d +R. K. B. r k b +RKD r k d +RKI r k i +RKKA r k k a +RKL r k l +R.K.O. r k o +RKO r k o +RKO's r k o's +R. K. r k +R.K. r k +RLM r l m +RLM's r l m's +R. L. r l +R.L. r l +RL r l +RLV r l v +rly r l y +RMAF r m a f +RMAG r m a g +RMA r m a +RMB r m b +RMCH r m c h +RMCL r m c l +R.M.C. r m c +RMC r m c +RMDs r m d's +R.M.E.S. r m e s +RMFL r m f l +R.M.F. r m f +RMF r m f +RMG r m g +R. M. H. r m h +RMI r m i +RMIT r m i t +RMIT's r m i t's +RMJM r m j m +R.M.K. r m k +RMK r m k +R. M. L. r m l +RML r m l +RMLs r m l's +RMM r m m +RMP r m p +rm r m +R. M. r m +R.M. r m +RM r m +RMR r m r +RMRS r m r s +RMSD r m s d +RMSDs r m s d's +RMSE r m s e +RMs r m's +RMS r m s +R. M. W. r m w +R.M.W. r m w +RMW r m w +RMX r m x +RNAO r n a o +rna r n a +RNA r n a +RNA's r n a's +RNAs r n a's +RNAS r n a s +RNC r n c +RND r n d +RNE r n e +Rnet r n e t +RNF r n f +RNG r n g +Rnic r n i c +RNJD r n j d +RNK r n k +RNLAF r n l a f +RNLI r n l i +RNLI's r n l i's +RNNs r n n's +RNOH r n o h +RNP r n p +RNPs r n p's +RNPS r n p s +R. N. r n +R.N. r n +RN r n +R.N.R. r n r +RNZ r n z +R.O.C.K. r o c k +R.O.C. r o c +ROKMC r o k m c +ROKN r o k n +rOmpB r o m p b +Rooi r o o i +ROP r o p +R. O. r o +R.O. r o +ROVs r o v's +Roxb r o x b +Roxx r o x x +RPA r p a +RPAYC r p a y c +RPB r p b +RP&C r p and c +rpc r p c +RPC r p c +Rpe r p e +RPE r p e +RPF r p f +RPGA r p g a +rpg r p g +RPG r p g +RPG's r p g's +RPGs r p g's +RPI r p i +RPI's r p i's +RPK r p k +RPL r p l +rpm r p m +RPM r p m +RPMS r p m s +RPO r p o +RPP r p p +R. P. r p +R.P. r p +RP r p +RPs r p's +R.P.S. r p s +RPS r p s +RPT r p t +RPVE r p v e +RPV r p v +R. Q. r q +R.Q. r q +RQW r q w +rra r r a +RRA r r a +RRC r r c +RRDE r r d e +rrd r r d +Rreli r r e l i +rre r r e +RRE r r e +RRG r r g +R. R. H. r r h +RRH r r h +RRKM r r k m +R.R.K. r r k +rRNA r r n a +rRNAs r r n a's +RRN r r n +RRP r r p +R&R r and r +rr r r +R. R. r r +R.R. r r +RR r r +RRR r r r +RRSR r r s r +RRS r r s +RSAF r s a f +R. S. A. r s a +RSA r s a +RSCG r s c g +RSCJ r s c j +R.S.C. r s c +RSC r s c +RSCS r s c s +RSD r s d +RSE r s e +RSF r s f +RSFSR r s f s r +RSG r s g +RSHA r s h a +rsh r s h +RSHS r s h s +RSICC r s i c c +RSID r s i d +RSI r s i +Rsis r s i's +RSI's r s i's +RSIS r s i s +RSK r s k +RSL r s l +rsly r s l y +RsmA r s m a +RSMC r s m c +RSMI r s m i +RSML r s m l +RSM r s m +RSM's r s m's +RSNO r s n o +RSN r s n +RSNZ r s n z +R. S. O. r s o +RSO r s o +RSPB r s p b +RSPCA r s p c a +RSpec r s p e c +RSPK r s p k +RSPO r s p o +R. S. P. r s p +RSP r s p +RSR r s r +rs r s +Rs r's +R. S. r s +R.S. r s +RS r s +RSSI r s s i +R. S. S. r s s +RSS r s s +RSSSF r s s s f +RSTC r s t c +RST r s t +RSu r s u +R.S.U. r s u +RSU r s u +RSU's r s u's +RSVP r s v p +RSV r s v +RSX r s x +RTAFB r t a f b +RTA r t a +RTBF r t b f +rtb r t b +RTB r t b +RTCG's r t c g's +RTC r t c +RTCs r t c's +RTD r t d +RTEC r t e c +RTEjr r t e j r +RTeOR r t e o r +RTE r t e +RTE's r t e's +RTFB r t f b +RTFM r t f m +RTF r t f +RTF's r t f's +RTFS r t f s +RTHK r t h k +RTHK's r t h k's +RTHL r t h l +RTIP r t i p +RTI r t i +RTKL r t k l +RTK r t k +RTLM r t l m +RTL r t l +RTL's r t l's +RTML r t m l +RTMP r t m p +RTM r t m +RTN r t n +RTO r t o +RTOs r t o's +RTOS r t o s +rtPA r t p a +RTP r t p +RTR r t r +rt r t +R. T. r t +R.T. r t +RT r t +rts r t s +RTS r t s +RTS's r t s's +RTTOV r t t o v +RTT r t t +RTTY r t t y +RTUK r t u k +RTU r t u +RTVC r t v c +RTVE r t v e +RTVFBiH r t v f b i h +RTV r t v +RTVV r t v v +RTW r t w +RTXC r t x c +RTX r t x +RTZ r t z +Ruao r u a o +RUC r u c +RUC's r u c's +RUFC r u f c +Rukn r u k n +Ruk r u k +RUMC r u m c +RUNX r u n x +Rupf r u p f +Rupr r u p r +rup r u p +Rup r u p +RUP r u p +R.U.R. r u r +RUR r u r +R. U. r u +R.U. r u +RU r u +RUSD r u s d +RutB r u t b +Ruu r u u +RUV r u v +Ruwa r u w a +Ruy r u y +Ruyt r u y t +RVAR r v a r +Rvat r v a t +RVC r v c +RVCT r v c t +RVD r v d +R. V. E. r v e +RVE r v e +RVGK r v g k +R.V.G. r v g +R. V. J. r v j +R.V.J. r v j +RVM r v m +RVNG r v n g +Rvo r v o +RVO r v o +RVR r v r +R. V. r v +R.V. r v +RV r v +rvs r v s +RVTD's r v t d's +RVU r v u +rwa r w a +Rwa r w a +RWA r w a +rwb r w b +RWB r w b +RWC r w c +RWD r w d +RWDSU r w d s u +RWE r w e +RWEs r w e's +RWFC r w f c +RWIS r w i s +rwjf r w j f +R.W.R.J. r w r j +R. W. r w +R.W. r w +RW r w +RWSL r w s l +R. W. S. r w s +RWS r w s +Rxa r x a +RXL r x l +RX r x +RZR r z r +rz r z +R. Z. r z +RZ r z +RZS r z s +S.A.B. s a b +S.A.D. s a d +SAIC s a i c +Sa'id s a i d +S.A.I. s a i +SAIT's s a i t's +SAKEC s a k e c +S. A. L. s a l +S.A.P.A. s a p a +S.A.P.I. s a p i +S. A. R. s a r +S.A.R. s a r +S. A. s a +S.A. s a +SA s a +S.A.S. s a s +SAS s a s +S.A.V.A. s a v a +Saxl s a x l +Sa'yo s a y o +SAZU s a z u +SBAC's s b a c's +Sbai s b a i +S.B.A. s b a +SBA s b a +SBBK s b b k +SBB s b b +SBCM s b c m +SBCMT s b c m t +sbc s b c +SBC s b c +SBCs s b c's +SBDE s b d e +SBD s b d +SBE s b e +SBF s b f +SBI s b i +SBKP s b k p +sbk s b k +SBK s b k +SBL s b l +SBML s b m l +SBMNH s b m n h +SBM s b m +SBN s b n +SBOE s b o e +SBOL s b o l +SBOP s b o p +Sborz s b o r z +SBP s b p +SBRJ s b r j +sb s b +S. B. s b +S.B. s b +SB s b +SBSE s b s e +SBS s b s +SBS's s b s's +SBTDC s b t d c +SBT s b t +SBTU s b t u +Sbu s b u +SBU s b u +SBY s b y +SBZ s b z +S.C.A. s c a +SCA s c a +SCBA s c b a +SCB s c b +SCBWI s c b w i +SCCA s c c a +SCCA's s c c a's +SCCC s c c c +SCCI s c c i +SCCP s c c p +S.C.C. s c c +SCC s c c +scr s c r +SCR s c r +SCRs s c r's +SCRS s c r s +SCRTC s c r t c +scry s c r y +sc s c +S. C. s c +S.C. s c +SC s c +S.C.S.C. s c s c +SC's s c's +SCS s c s +SCTC s c t c +SCTE s c t e +SCTO s c t o +SCTP s c t p +SCT s c t +SCTS s c t s +SCTV s c t v +SCUAA s c u a a +Scuf s c u f +SCU's s c u's +SCW s c w +Scymn s c y m n +SD&AE s d and a e +SDAP s d a p +SDAPS s d a p s +SDA s d a +SDAS s d a s +SDASS s d a s s +SDAT s d a t +SDAX s d a x +SDB s d b +SDCC s d c c +SDCG s d c g +SDC s d c +SDDI s d d i +SDDOT s d d o t +SDD s d d +SDDS s d d s +Sdei s d e i +sde s d e +Sde s d e +SDF s d f +SDG&E s d g and e +SDHA s d h a +SDHC s d h c +SDHD s d h d +SDHHD s d h h d +SDH s d h +SDI s d i +SDJ s d j +sdk s d k +SDK s d k +SDK's s d k's +SDLC s d l c +SDLP s d l p +SDLP's s d l p's +S. D. L. s d l +S.D.N.Y. s d n y +S. D. s d +S.D. s d +SDSM&T's s d s m and t's +S. D. S. s d s +S.E.C. s e c +Sejms s e j m's +sejr s e j r +Sekl s e k l +Sek s e k +SEK s e k +S. E. s e +S.E. s e +SESL s e s l +ses s e s +Ses s e's +SEs s e's +S.E.S. s e s +SES s e s +S.E.S.'s s e s's +S.E.X. s e x +Seyh s e y h +sfadb s f a d b +SFA s f a +SFA's s f a's +Sfax s f a x +S.F.B.J. s f b j +SFB s f b +SFCC s f c c +SFCH s f c h +SFC s f c +SFDR s f d r +SFD s f d +SFE s f e +SFES s f e s +SFFAS s f f a s +SFFCo s f f c o +SFFH s f f h +SFF s f f +SFG s f g +SFI s f i +S. F. L. s f l +SFL s f l +sfn s f n +SFN s f n +SFOR s f o r +SFO s f o +SFPA s f p a +SFPD s f p d +SFP s f p +SFRA s f r a +SFRJ s f r j +SFR s f r +'sf s f +sf s f +S. F. s f +S.F. s f +SF s f +SFSG s f s g +SFSR s f s r +sfs s f s +S.F.'s s f's +SFS s f s +SFSS s f s s +SFSU s f s u +SFU s f u +SFWA s f w a +SFX s f x +SFX's s f x's +SGAE s g a e +SGAP s g a p +SGA s g a +SG&A s g and a +SGA's s g a's +sgb s g b +SGB s g b +S.G.C. s g c +SGC s g c +SGD s g d +SGE s g e +S. G. F. s g f +SGF s g f +SGH s g h +Sgip s g i p +SGI s g i +SGL s g l +sgml s g m l +SGML s g m l +SGP s g p +SGRAM s g r a m +sgra s g r a +SGR s g r +S. G. s g +S.G. s g +SGSN s g s n +SGSNs s g s n's +SGS s g s +SGSY s g s y +Sgt. sergeant +sgt s g t +SGT s g t +SGU s g u +SGV s g v +SGX s g x +SHBG s h b g +SHB s h b +SHBT s h b t +SHCA s h c a +SHC s h c +SHG s h g +SHH s h h +S.H.I.E.L.D.'s s h i e l d's +S.H.I.E.L. s h i e l +SHL s h l +Shma s h m a +SHMD s h m d +SHM s h m +SHN s h n +S. H. s h +S.H. s h +S.I.D. s i d +SIF s i f +S.I.R. s i r +S. I. s i +S.I. s i +S.I.T. s i t +SJAM s j a m +SJCH s j c h +SJC s j c +SJDA s j d a +S.J.D. s j d +sjef s j e f +SJEM s j e m +S. J. J. F. s j j f +SJK s j k +S.J.L. s j l +SJL s j l +SJM s j m +Sjon s j o n +SJPCD s j p c d +SJP s j p +SJPT s j p t +SJR s j r +SJR's s j r's +S. J. s j +S.J. s j +SJ s j +SKB s k b +SKC s k c +SKD s k d +S. K. F. s k f +SKF s k f +SKG s k g +SKH s k h +SKJ s k j +SKPC s k p c +SKP s k p +SKR s k r +SKSD s k s d +S. K. s k +S.K. s k +SK s k +S. K. S. s k s +S.L.A.A.'s s l a a's +SLAF s l a f +sla s l a +SLA s l a +SLAs s l a's +SLCO s l c o +SLC s l c +SLDL s l d l +SLD s l d +S.L.E. s l e +SLF s l f +SLG s l g +SLHS s l h s +S.L.I.F.E.R. s l i f e r +SLL s l l +SLMC s l m c +S. L. M. s l m +SLM s l m +SLN s l n +SLPIM s l p i m +SLP s l p +SLRC s l r c +SLR s l r +S&L s and l +SLS&E s l s and e +SLSF s l s f +SLSK s l s k +S. L. s l +S.L. s l +SL s l +SLS s l s +SLVR s l v r +SLV s l v +S.M.A.R.T. s m a r t +S.M.A.S.H. s m a s h +SMA s m a +SMB s m b +SMe s m e +SME s m e +SME's s m e's +SMEs s m e's +SMF s m f +SMG s m g +SMHI s m h i +SMH s m h +SmI s m i +SMI s m i +SMJR s m j r +SMK s m k +SMLS s m l s +SMMT s m m t +SMN s m n +SMNS s m n s +SMP s m p +SMP's s m p's +SMPSs s m p s's +SMPTE s m p t e +SMRJ s m r j +smr s m r +SMR s m r +S&M s and m +sm s m +S. M. s m +S.M. s m +SM s m +S.M.'s s m's +SMs s m s +S.M.S. s m s +SMS s m s +SMS's s m s's +SMSU s m s u +SMTP s m t p +S.M.T. s m t +SMT s m t +SMTV s m t v +SMU s m u +SMW s m w +SMX s m x +Smyl s m y l +SMYS s m y s +SNA s n a +SNBA s n b a +SNB s n b +SNCB s n c b +SNCC s n c c +SNCF s n c f +SNC s n c +SNDC s n d c +SND s n d +SNEP s n e p +SNESjr s n e s j r +SNES s n e s +SNET s n e t +SNF s n f +SNFU s n f u +sngle s n g l e +SNG s n g +SNK s n k +SNLA s n l a +SNL s n l +SNLS s n l s +SNMCMG s n m c m g +SNMC s n m c +SNMMA s n m m a +SNMP s n m p +SNNPR s n n p r +SNNR s n n r +SNPJ s n p j +SNP s n p +SNPs s n p's +SNRI s n r i +SNR s n r +SNSD's s n s d's +SnSe s n s e +S. N. s n +S.N. s n +SN s n +SNS s n s +S.O.E. s o e +S. O. s o +S.O. s o +Sos s o's +S.O.S. s o s +SOS s o s +Sovn s o v n +Sov s o v +SOX s o x +S. O. Y. s o y +S&P 500 s and p five hundred +S.P.A.L. s p a l +SPB s p b +SPCAs s p c a's +SPCG s p c g +SPCK s p c k +S.P.C. s p c +SPC s p c +SPDI s p d i +SPD s p d +Spe s p e +SPE s p e +SPES s p e s +SPFH s p f h +SPFL s p f l +SPF s p f +SPG s p g +SPHL s p h l +SPH s p h +SPINE's s p i n e's +SPIN's s p i n's +SPIR s p i r +SPI s p i +S.P.I.T. s p i t +SPJA s p j a +SPK s p k +SPLA s p l a +SPLC s p l c +SPL s p l +SPME s p m e +SPML s p m l +SPMRL s p m r l +SPM s p m +SPNJ s p n j +SPNM s p n m +SPNN s p n n +SPN s p n +Spoa s p o a +SPOC s p o c +SPOF s p o f +SPO s p o +SPOU s p o u +SpPIn s p p i n +SPP s p p +SPP's s p p's +SPQA s p q a +SPRL s p r l +SPRM s p r m +SPR s p r +SPRU s p r u +S&P s and p +SPSA s p s a +SPSL s p s l +S. P. s p +S.P. s p +SP s p +SP's s p's +SPs s p's +SPS s p s +SPSS s p s s +SPTA s p t a +SPT s p t +SPUC s p u c +SPUP s p u p +SPU s p u +SPUs s p u's +SPV s p v +SQA s q a +SQBB s q b b +SQI s q i +sql s q l +SQL s q l +SQM s q m +sq s q +S. Q. s q +SQ s q +Sra s r a +SRA s r a +SRAs s r a's +Srba s r b a +SRBIJA s r b i j a +Srbi s r b i +SRBOC s r b o c +SRBP s r b p +SRB s r b +SRBs s r b's +SRBY s r b y +SRCC s r c c +SRCL s r c l +SRC s r c +SRC's s r c's +SRCS s r c s +S.R.E. s r e +SRE s r e +SRFC s r f c +S.R.F. s r f +SRF s r f +S.R.G. s r g +SRG s r g +SRGs s r g's +Srhir s r h i r +SRH s r h +S. R. J. s r j +SRK s r k +SRK's s r k's +SRLGs s r l g's +S.R.L. s r l +SRL s r l +SRMC s r m c +S.R.M. s r m +SRM s r m +SRMs s r m's +Srni s r n i +SRN s r n +srp s r p +SRPT s r p t +S. R. R. s r r +SRR s r r +sr s r +S. R. s r +S.R. s r +SR s r +SRS s r s +SRT s r t +SRT's s r t's +Srul s r u l +SRU s r u +SRU's s r u's +SRV s r v +srx s r x +SRY s r y +SSAA s s a a +SSAB s s a b +SSAC s s a c +SSAR s s a r +SSA s s a +SSAs s s a's +SSBSE s s b s e +S.S.B. s s b +SSB s s b +SSCCC s s c c c +S.S.C. s s c +SSC s s c +SSCV s s c v +SSDB s s d b +SSDL s s d l +SSDP s s d p +S.S.D. s s d +SSD s s d +SSDs s s d's +SSE s s e +SSFL s s f l +SSF s s f +SSGRC s s g r c +SSG s s g +SSHSA's s s h s a's +SSH s s h +SSI s s i +SSIS s s i s +SSJA s s j a +SSKI's s s k i's +SSK s s k +SSLAM s s l a m +ssl s s l +SSL s s l +SSLT s s l t +SSME s s m e +SSM s s m +SSN s s n +SSOD s s o d +SSOF s s o f +SSO s s o +SSoSV s s o s v +SSPA s s p a +SSPH s s p h +SSPR s s p r +SSP s s p +SSPX s s p x +SSQ s s q +SSRAA s s r a a +SSRF s s r f +SSRI s s r i +SSRIs s s r i's +SSRN s s r n +SSRP s s r p +SSR s s r +S&S s and s +SSSC s s s c +SSSI s s s i +SSSR s s s r +ss s s +S. s s's +Ss s's +S. S. s s +S.S. s s +SSS s s s +SSTH s s t h +SSTL s s t l +SSTR s s t r +SST s s t +Ssu s s u +SSU s s u +SSVC s s v c +SSV s s v +SSWAHS s s w a h s +SSWC s s w c +ssw s s w +SSW s s w +SSX s s x +Ssy s s y +ST&AJ s t and a j +S.T.A.L.K.E.R. s t a l k e r +S.T.A.T.U.S. s t a t u s +STB s t b +STCC s t c c +S.T.C. s t c +STC s t c +STCW s t c w +STDP s t d p +STDs s t d's +STD s t d +STF s t f +stfv s t f v +STGs s t g's +STG s t g +STIs s t i's +STIS s t i s +STi s t i +STI s t i +STK s t k +stl s t l +S.T.L. s t l +STL s t l +STMB s t m b +STMIK s t m i k +STM's s t m's +STM s t m +STNS s t n s +STN s t n +STPI s t p i +STPNS s t p n s +STPRI s t p r i +STP s t p +S.T.R.I.D.E. s t r i d e +STRV s t r v +Stryj s t r y j +STRZ s t r z +S&T s and t +STScI s t s c i +ST's s t's +STS s t s +S. T. s t +S.T. s t +STT s t t +STVL s t v l +STVS s t v s +STV s t v +STXBP s t x b p +STX s t x +S.U.C. s u c +SUDV s u d v +suo s u o +Suo s u o +S. U. s u +SUSV s u s v +Susz s u s z +SUTs s u t's +SUT s u t +Suu s u u +SUVF s u v f +SUVs s u v's +SUV s u v +SUWN s u w n +SUW s u w +Suy s u y +Svac s v a c +svar s v a r +SVA s v a +SVB s v b +SVC s v c +SVD s v d +sve s v e +Sve s v e +SVGA s v g a +SVG s v g +SVGT s v g t +SVIA s v i a +SVM's s v m's +SVMs s v m's +SVM s v m +svn s v n +svom s v o m +SVOPC s v o p c +SVO s v o +SVP s v p +SVPW s v p w +SVSCEP s v s c e p +SVSC s v s c +SVS s v s +sv s v +S. V. s v +S.V. s v +SV s v +SVTs s v t's +SVT s v t +S.V.U. s v u +SVU s v u +SVV s v v +SVW s v w +SVYASA s v y a s a +SVZ s v z +SWABC s w a b c +SWAC s w a c +S.W.A. s w a +SWC s w c +SWD s w d +S.W.E.A.T. s w e a t +Swe s w e +SWE s w e +SWF s w f +SWG s w g +SWHL s w h l +SWHS s w h s +SWH s w h +SWJN s w j n +S. W. K. s w k +SWMRS s w m r s +SWM s w m +SWNH s w n h +SWOC s w o c +S.W.O.R.D. s w o r d +SWPA s w p a +SWPL s w p l +SWP s w p +SwRI s w r i +SWR s w r +S&W s and w +SWS s w s +sw s w +S. W. s w +S.W. s w +SW s w +SWTPC s w t p c +SWT s w t +SWWTP s w w t p +SXCT s x c t +SXN s x n +SXSW s x s w +sx s x +S.X. s x +SX s x +SXY s x y +SXZ s x z +SysML s y s m l +SysRq s y s r q +Sys s y's +SYS s y s +SysV s y s v +S. Y. s y +S.Y. s y +SyT s y t +syv s y v +Syxx s y x x +SZA s z a +Szasz s z a s z +SZDSZ s z d s z +Szer s z e r +szkic s z k i c +Szklo s z k l o +Szlak s z l a k +SzMME s z m m e +Szpir s z p i r +SZSE s z s e +sz s z +S. Z. s z +SZ s z +Szu s z u +Szyk s z y k +SZZ s z z +taf t a f +Taf t a f +TAF t a f +T.A.P. t a p +T.A.S. t a s +T. A. t a +T.A. t a +T&A t and a +TAZ t a z +TBA t b a +TBCA t b c a +TBCs t b c's +TBCS t b c s +TBD t b d +TBE t b e +TBF t b f +TBHS t b h s +TBI t b i +TBK t b k +TBL t b l +TBMA t b m a +TBMM t b m m +TBMs t b m's +T.B.M. t b m +TBM t b m +TBNJ t b n j +TBN t b n +TBP t b p +TBSC t b s c +TBS t b s +T. B. t b +T.B. t b +TB t b +TBTG t b t g +TBT t b t +TBV t b v +TBWA t b w a +TBX t b x +TCAR t c a r +TCA t c a +TCC t c c +TCDB t c d b +TCDC t c d c +TCDD t c d d +TCDOG t c d o g +TCDSB t c d s b +TCEB t c e b +T.C.E. t c e +TCE t c e +TCFAQ t c f a q +TCF t c f +TCGA t c g a +TCG t c g +TCHC t c h c +TCHRD t c h r d +TCH t c h +TCiAP t c i a p +TCI t c i +TCKs t c k's +TCLP t c l p +TCL t c l +TCM t c m +TCO t c o +TCPL t c p l +TCPMP t c p m p +TCP's t c p's +TCP t c p +TCR t c r +T. C. S. t c s +TCS t c s +tc t c +T. C. t c +T.C. t c +TC t c +TCT t c t +TCU's t c u's +TCUs t c u's +tcu t c u +TCU t c u +TCV t c v +TCWC t c w c +TCWP t c w p +TCW t c w +TDA t d a +tdb t d b +TDCC t d c c +TDCi t d c i +TDCJ t d c j +TDC's t d c's +TDC t d c +TDD t d d +TDE t d e +TDF t d f +TDH t d h +TdIF t d i f +TDI t d i +TDK t d k +TDLAS t d l a s +TDMA t d m a +TDME t d m e +TDM t d m +TDO t d o +TDP t d p +TDRS t d r s +TDR t d r +TDSB's t d s b's +TD's t d's +TDs t d's +T. D. t d +T.D. t d +TDT t d t +Teatr t e a t r +TEBD t e b d +T. E. t e +T.E. t e +TEVT t e v t +TFAP t f a p +TFAS t f a s +TFA t f a +TFC t f c +TFCU t f c u +TFF t f f +TFG t f g +T.F.H. t f h +TFH t f h +TFIH t f i h +TFIID t f i i d +TFK t f k +TFL t f l +TFM t f m +TFN t f n +TFO t f o +TFP t f p +TFR t f r +TFSA t f s a +TFSI t f s i +TFSP t f s p +TFS t f s +T&F t and f +TFTC t f t c +tf t f +T. F. t f +T.F. t f +TF t f +TFTR t f t r +TFT t f t +TFVC t f v c +TFWA t f w a +TFW t f w +TGA t g a +T. G. C. t g c +TGC t g c +tge t g e +T.G.E. t g e +TGE t g e +tgf t g f +TGF t g f +TGG t g g +TGIF t g i f +TGL t g l +TGM's t g m's +TGM t g m +TGP t g p +TGS t g s +tg t g +T. G. t g +T.G. t g +TG t g +TGVs t g v's +tgv t g v +TGV t g v +THB t h b +THCF t h c f +THC t h c +T.H.E.M. t h e m +T.H.F. t h f +THF t h f +THG t h g +THHK t h h k +T.H.I.N.K. B.I.G. t h i n k b i g +THLL t h l l +THL t h l +THMP t h m p +THM t h m +THNN t h n n +Thok t h o k +T.H.P. t h p +THQ's t h q's +THQ t h q +THR's t h r's +THR t h r +ThSe t h s e +THSR t h s r +T. H. S. t h s +T. H. t h +T.H. t h +T.H.U.N.D.E.R. t h u n d e r +THW t h w +THX t h x +TIAA t i a a +T. I. A. t i a +TIFR t i f r +TIF t i f +T.I.'s t i's +T.I. t i +Tiu t i u +tiv t i v +Tiv t i v +TIYM t i y m +Tiy's t i y's +TJFR t j f r +T.J.F. t j f +T. J. J. t j j +TJJ t j j +TJP t j p +TJRC t j r c +T. J. S. t j s +T. J. t j +T.J. t j +TJ t j +t'ju t j u +TJX t j x +tjz t j z +TKAG t k a g +TKA's t k a's +TKA t k a +T. K. B. t k b +TKD t k d +TKE t k e +TKI t k i +tko t k o +Tko t k o +TKO t k o +TKP t k p +TK's t k's +TKS t k s +tk t k +T. K. t k +T.K. t k +TK t k +TKT t k t +Tlas t l a's +TLA t l a +TLC's t l c's +TLC t l c +TlCu t l c u +TLDs t l d's +TLDS t l d s +TLD t l d +TLE t l e +TLF t l f +TLG t l g +TLH t l h +tli t l i +TLI t l i +TLM t l m +TLN t l n +tlp t l p +TLP t l p +TLRs t l r's +TLR t l r +TLs t l's +TLS t l s +TLTB t l t b +tl t l +T. L. t l +T.L. t l +TL t l +Tluk t l u k +TLU's t l u's +Tluszcz t l u s z c z +TLV t l v +TMA t m a +TMBG t m b g +TMB t m b +tmc t m c +TMC t m c +TMDL t m d l +TMD t m d +TMEM t m e m +TMF t m f +TMGs t m g's +TMG t m g +TMI t m i +TMJ t m j +T. M. M. t m m +TMMTX t m m t x +TMNT t m n t +TMOK t m o k +tmos t m o s +TMO's t m o's +TMOS t m o s +TMO t m o +TMPRSS t m p r s s +TMP t m p +tmRNA t m r n a +TMRS t m r s +TMR t m r +TMSI t m s i +TMSs t m s's +T.M.s t m's +TM's t m's +TMS t m s +tm t m +T. M. t m +T.M. t m +TM t m +T.M.T. t m t +TMT t m t +TMU t m u +TMX t m x +TMZ t m z +TNA's t n a's +TNAS t n a s +tna t n a +TNA t n a +tnbc t n b c +TNCA t n c a +TNCC t n c c +TNCDSB t n c d s b +TNC t n c +TNFA t n f a +TNFRSF t n f r s f +TNFSF t n f s f +TNF t n f +TNGHT t n g h t +TNG t n g +TNIP t n i p +TNI t n i +TNL t n l +TNMM t n m m +TNMS t n m s +TNM t n m +TNNC t n n c +TNN t n n +TNO t n o +TNPL t n p l +TNP t n p +TNQ t n q +TNRIS t n r i s +TNRP t n r p +TNR t n r +TNSDL t n s d l +TNSTC t n s t c +TNS t n s +TNTAs t n t a's +tn t n +T. N. t n +T.N. t n +TN t n +TNT's t n t's +TNT t n t +T.O.'s t o's +T. O. t o +T.O. t o +TOTP t o t p +tou t o u +Tou t o u +ToU t o u +TOU t o u +TPAC t p a c +TPAO t p a o +tPA t p a +TPA t p a +TPB t p b +TPC t p c +TPEC t p e c +TPF t p f +TPG t p g +TPH t p h +TPI t p i +TPLF t p l f +TPMS t p m s +TPM t p m +TPO t p o +TPP t p p +tpr t p r +TPR t p r +TPS t p s +tp t p +T. P. t p +T.P. t p +TP t p +TPU t p u +TPVs t p v's +TPWS t p w s +TPW t p w +TQR t q r +TQS t q s +T.Q. t q +TQ t q +tra t r a +Tra t r a +TRA t r a +TRAXX t r a x x +Trbic t r b i c +T. R. B. t r b +TRB t r b +Trcek t r c e k +TRC t r c +TRD t r d +TRFA t r f a +TRF t r f +TRG t r g +TRHS t r h s +TRH t r h +T.R.I.B.E. t r i b e +TRIZ t r i z +TrkA t r k a +TRL t r l +Trmcic t r m c i c +TRMM t r m m +T.R.M.P.A. t r m p a +T. R. M. t r m +tRNA t r n a +TRNC t r n c +TRPC t r p c +TRPM t r p m +TRPP t r p p +TRP's t r p's +TRPs t r p's +TRP t r p +TRPV t r p v +TRPY t r p y +TRSC t r s c +TRSI t r s i +trs t r s +T.R.S. t r s +TRS t r s +TRSV t r s v +tr t r +T. R. t r +T.R. t r +TR t r +TRT t r t +T.R.U. t r u +truTV t r u t v +TruTV t r u t v +Trve t r v e +TRVL t r v l +TRV t r v +TRW t r w +TRX t r x +TRZ t r z +Tsa t s a +TSA t s a +TSB t s b +TSCC t s c c +TSC t s c +TSF t s f +TSG t s g +TSH t s h +TSI t s i +tsit t s i t +Tsiv t s i v +TSJDF t s j d f +TSLA t s l a +TSLC t s l c +TSL t s l +TSN t s n +T.S.O.L. t s o l +tso t s o +Tso t s o +T.S.O. t s o +TSO t s o +TSP t s p +TSRA t s r a +TSRTC t s r t c +tsr t s r +TSR t s r +TSSA t s s a +TSS t s s +TSTA t s t a +TSTC t s t c +Ts t's +T. S. t s +T.S. t s +TS t s +TST t s t +tsus t s u s +Tsvi t s v i +TSV t s v +TSW t s w +TSX t s x +TTAB t t a b +Tta t t a +TTA t t a +TTBB t t b b +TTCP t t c p +TTC's t t c's +TTC t t c +TTDC t t d c +TTD t t d +tteok t t e o k +tte t t e +TTFF t t f f +TTHS t t h s +TTH t t h +TTI t t i +TTKG t t k g +TTKST t t k s t +TTL t t l +T.T.N. t t n +ttp t t p +TTP t t p +TTR t t r +TTS t t s +T&T t and t +T. T. t t +T.T. t t +TT t t +TTU t t u +TTV t t v +TTW t t w +TUAW t u a w +TUBB t u b b +TUDCA t u d c a +TUHUS t u h u s +TUKP's t u k p's +T'uqu t u q u +TUSCC t u s c c +TUSC t u s c +T.U. t u +TU t u +Tuzk t u z k +tvaan t v a a n +Tvam t v a m +TVARK t v a r k +tva t v a +TVA t v a +TVB's t v b's +TVB t v b +TVCM t v c m +TVC t v c +TVD t v d +TVETs t v e t's +TVE t v e +TVF t v f +TVGN t v g n +TVGOS t v g o s +TVii t v i i +TVIn t v i n +TVi t v i +TVI t v i +TVL t v l +TVMK t v m k +TVM t v m +TVN t v n +TVNZ's t v n z's +TVNZ t v n z +tvo t v o +Tvo t v o +TVO t v o +TVP t v p +TVRi t v r i +tvr t v r +TVR t v r +TVSpy t v s p y +tvs t v s +TV's t v's +TVs t v's +TVS t v s +tVTA t v t a +TVT t v t +tv t v +T. V. t v +T.V. t v +TV t v +TVXQ's t v x q's +TVXQ t v x q +TVX t v x +TWAP t w a p +TWA's t w a's +Twa t w a +TWA t w a +T. W. B. t w b +TWCS t w c s +TWC t w c +TWF t w f +Twi t w i +TWI t w i +TWK t w k +T.W.P. t w p +TWP t w p +TWR t w r +TWTs t w t's +TWT t w t +T. W. t w +T.W. t w +TW t w +T.W.U. t w u +TWU t w u +TXC t x c +TxDOT t x d o t +TXII t x i i +TXJS t x j s +TXK t x k +TXMM t x m m +TXNDC t x n d c +TXP t x p +TXTC t x t c +TXT t x t +T.X. t x +TX t x +TXU t x u +tya t y a +TYC t y c +TYIB t y i b +TYIN t y i n +Tyk t y k +Tza'ar t z a a r +Tze t z e +TZL t z l +Tzrif t z r i f +Tzuh t z u h +TZUM t z u m +Tzvi t z v i +UAAP's u a a p's +UAAP u a a p +UAB's u a b's +UAB u a b +UAC u a c +UADY u a d y +UAE's u a e's +U.A.E. u a e +UAE u a e +UAF u a f +UAHC u a h c +UAIC u a i c +UAI u a i +UALR u a l r +UAL u a l +UALVP u a l v p +UAM u a m +UANL u a n l +UAN u a n +UAOC u a o c +UAP u a p +UAR's u a r's +UARS u a r s +UAR u a r +UASLP u a s l p +UASL u a s l +Ua's u a's +UAs u a's +UAS u a s +uat u a t +U.A. u a +UA u a +UAVs u a v's +UAV u a v +U.A.W. u a w +UAW u a w +Uba u b a +UBA u b a +UBBC u b b c +UBB u b b +UbcM u b c m +UBC's u b c's +UBC u b c +UBD u b d +Ube u b e +UBE u b e +ubi u b i +Ubi u b i +U.B.I. u b i +UBI u b i +UBM's u b m's +UBM u b m +UBOS u b o s +UBO u b o +UBPR u b p r +UBP u b p +UBRE u b r e +UBRS u b r s +UB's u b's +UBS u b s +UB u b +UBU u b u +UBV u b v +UBX u b x +UCAC u c a c +Ucar u c a r +UCAR u c a r +Uca u c a +UCA u c a +UCAV u c a v +UCBH u c b h +UCBTLA u c b t l a +UCB u c b +UCCE u c c e +UCCJ u c c j +UCCS u c c s +UCC u c c +UCDA u c d a +UCDP u c d p +UCD u c d +UCF's u c f's +UCF u c f +UCG u c g +UCIEP u c i e p +UCI's u c i's +UCI u c i +UCLA's u c l a's +UCLA u c l a +UCLH u c l h +UCL u c l +Ucmak u c m a k +UCMJ u c m j +UCMMA u c m m a +UCMP u c m p +UCM u c m +Ucn u c n +UCOS u c o s +UCO u c o +UCP u c p +UCREF u c r e f +UCR u c r +UCSB u c s b +UCSC u c s c +UCSD's u c s d's +UCSD u c s d +UCSF u c s f +UCSN u c s n +uCs u c's +UCS u c s +UCTI u c t i +UCT u c t +Uc u c +U.C. u c +UC u c +Ucuncu u c u n c u +UCU u c u +UCVTS u c v t s +UCW u c w +Uczta u c z t a +UDAR u d a r +UDA's u d a's +Uda u d a +UDA u d a +UDC u d c +UDF u d f +UDG u d g +UDHR u d h r +UDICHI u d i c h i +Udit u d i t +UDK u d k +UDN u d n +Udny u d n y +UDP u d p +UDRP u d r p +UDR u d r +UDS u d s +UDTs u d t's +UDT u d t +ud u d +Ud u d +U. D. u d +UD u d +udu u d u +Udu u d u +UDYCO u d y c o +UEA u e a +UEBT u e b t +UEC u e c +Ueda's u e d a's +Uéda u e acute d a +UEFA's u e f a's +UEFA u e f a +UEFI u e f i +UEFS u e f s +UEF u e f +UEGCL u e g c l +UEI u e i +UET u e t +U. E. u e +UE u e +UFABC u f a b c +UFA's u f a's +UFCO u f c o +UFC's u f c's +UFC u f c +UFCW u f c w +UFD u f d +UFE u f e +UFF u f f +UFIA u f i a +UFIP u f i p +UFI u f i +UFJ u f j +UFL's u f l's +UFL u f l +UFM u f m +UFOFU u f o f u +UFORE u f o r e +UFO's u f o's +UFOs u f o's +Ufot u f o t +ufo u f o +Ufo u f o +UFO u f o +UFPE u f p e +UFPLS u f p l s +UFPR u f p r +UFRGS u f r g s +UFRJ u f r j +UFSB u f s b +UFSM u f s m +UF's u f's +UFS u f s +UFTP u f t p +UFT u f t +U. F. u f +UF u f +UFU u f u +UFW u f w +Uga u g a +UGA u g a +UGCCWA u g c c w a +UGC u g c +Uge u g e +UGK u g k +UGME u g m e +Ugni u g n i +UGP u g p +Ugra u g r a +Ugrszke u g r s z k e +UGS u g s +UGT u g t +Ug u g +UG u g +UHD u h d +UHF u h f +Uhha u h h a +UHMW u h m w +uhn u h n +UHOA u h o a +UHPT u h p t +UHP u h p +UHRA u h r a +UHRF u h r f +UHSAA u h s a a +UHTCs u h t c's +UHTI u h t i +Uhud u h u d +U.H. u h +uhur u h u r +UIAA u i a a +Uiam u i a m +UIA u i a +UIAW u i a w +Uibh u i b h +Uible u i b l e +UIC u i c +UID u i d +UIFL u i f l +Uig u i g +UIL u i l +UIMC u i m c +UIM u i m +UIN u i n +UIP u i p +uisae u i s a e +UISD u i s d +UiS u i s +UI's u i's +UIS u i s +UiTM u i t m +uit u i t +Uit u i t +UIT u i t +UI u i +UIV u i v +Uiy u i y +UJA u j a +UJC u j c +Ujed u j e d +UJEP u j e p +Ujsag u j s a g +UJS u j s +uj u j +U.J. u j +UJ u j +Uka u k a +UKA u k a +UKCC u k c c +UKC u k c +Ukhra u k h r a +Ukic u k i c +UKIP's u k i p's +Uki u k i +UKM u k m +UKNC u k n c +Ukoh u k o h +Uko u k o +UKPC u k p c +UKRDA's u k r d a's +UKRDA u k r d a +ukr u k r +Ukr u k r +UKR u k r +UK's u k's +UKTV's u k t v's +UKTV u k t v +Ukui u k u i +u'k u k +uk u k +Uk u k +U.K. u k +UK u k +UKUP u k u p +UKVRN u k v r n +UKW u k w +Ukwu u k w u +ULC u l c +ULDB u l d b +ULEB u l e b +ULEV u l e v +ULFA u l f a +Ulic u l i c +Ull u l l +Ulms u l m's +UL's u l's +ULS u l s +UlSU u l s u +Uluj u l u j +U.L. u l +UL u l +ulus u l u s +Ulus u l u's +ULV u l v +Ulwa u l w a +Ulwe u l w e +UMBC u m b c +Umbr u m b r +UMCC u m c c +UMC u m c +UMD u m d +umelcu u m e l c u +UMe u m e +UMF u m f +UMG u m g +UMHB u m h b +UMH u m h +UMKC's u m k c's +UMKC u m k c +UMK u m k +Umla u m l a +UML u m l +Umme u m m e +UMMSM u m m s m +UMNO's u m n o's +Umno u m n o +UMNO u m n o +UMN u m n +UMO u m o +UMPP u m p p +UMP's u m p's +Umri u m r i +UM's u m's +UMTS u m t s +UMTV u m t v +UMUC u m u c +U. M. u m +U.M. u m +UMWA's u m w a's +UMWA u m w a +UMW's u m w's +UNAB u n a b +UNAFF u n a f f +UNAH u n a h +UNAMIR u n a m i r +UNAPACK's u n a p a c k's +UNB u n b +UNCAC u n c a c +UNCAF u n c a f +UNCG's u n c g's +UNCG u n c g +UNCHE u n c h e +UNCHS u n c h s +UNCMAC u n c m a c +UNCRO u n c r o +UNC's u n c's +UNCTAD u n c t a d +UNC u n c +UNCW u n c w +UNFCCC u n f c c c +UNFICYP u n f i c y p +UNFPA u n f p a +UNFP u n f p +UNF u n f +UNFWP u n f w p +UNGC u n g c +UNHCR u n h c r +UNHRC's u n h r c's +UNH u n h +UNJLC u n j l c +Unli u n l i +UNL u n l +Unlu u n l u +UNLV u n l v +UNMCK u n m c k +UNMC u n m c +UNMEE's u n m e e's +UNMEE u n m e e +UNMIH u n m i h +UNMIK u n m i k +UNMIL u n m i l +UNMOVIC u n m o v i c +unm u n m +UNM u n m +UNNC u n n c +unnd u n n d +UNODC u n o d c +UNOMIG u n o m i g +UNPA u n p a +UNPD u n p d +UNPO u n p o +UNP u n p +UNRA u n r a +UNRSGN u n r s g n +UNRWA u n r w a +UNSCOM u n s c o m +UNSCR u n s c r +UNSC's u n s c's +UNSC u n s c +UNSh u n s h +Unst u n s t +Uns u n's +U.N.'s u n's +UNSW u n s w +UNTSO u n t s o +UNTS u n t s +Unt u n t +UNT u n t +U.N. u n +UNU u n u +UNV u n v +UNWE u n w e +UNWTO u n w t o +Unz u n z +UOCl u o c l +UODA u o d a +UofM u o f m +UOHI u o h i +Uoho u o h o +UOJCA u o j c a +UOL u o l +UOMINI u o m i n i +UoMs u o m's +UOP u o p +uORF u o r f +UOR u o r +UO's u o's +UO u o +UoW u o w +UPABA u p a b a +UPBs u p b's +UPCI u p c i +UPC u p c +UPDRS u p d r s +UPDS u p d s +Upd u p d +UPD u p d +UPEC u p e c +UPFA u p f a +UPF's u p f's +Upf u p f +UPF u p f +UPGMA u p g m a +UPHSD u p h s d +UPIN u p i n +Upir u p i r +UPI's u p i's +UPI u p i +UPLA u p l a +UPLB u p l b +UPL u p l +UPMC u p m c +UPMSS u p m s s +UPND u p n d +UPNE u p n e +UPnP u p n p +UPN u p n +UPOV u p o v +UPPP u p p p +Uppu u p p u +UPRR u p r r +UPR u p r +UPSC u p s c +U.P.S.E.B. u p s e b +UPSIDC u p s i d c +UPSID u p s i d +UPSI u p s i +UPSMF u p s m f +UPTI u p t i +UPTU u p t u +U.P. u p +UPW u p w +UPX u p x +UPyD u p y d +UQAIB u q a i b +UQAM's u q a m's +UQC u q c +U.Q.P. u q p +UQP u q p +uq u q +UQ u q +URAA u r a a +URAC u r a c +URAP u r a p +URBACT u r b a c t +URBED u r b e d +Urbz u r b z +URCL u r c l +URCSA u r c s a +URCs u r c's +URC u r c +Urdd u r d d +Urei u r e i +urf u r f +Urla u r l a +URLhttp u r l h t t p +URLs u r l's +URL u r l +URNU u r n u +UROC u r o c +URRACA u r r a c a +URSB u r s b +URSEC u r s e c +URSS u r s s +URW's u r w's +URW u r w +URZ u r z +USAACE u s a a c e +USAAC u s a a c +U.S.A.A.F. u s a a f +USAAF u s a a f +USAB u s a b +USACE u s a c e +USAC u s a c +USAFE u s a f e +USAF u s a f +USAHA u s a h a +USAID's u s a i d's +USAID u s a i d +Usal u s a l +usan u s a n +Usan u s a n +USAN u s a n +usao u s a o +USAO u s a o +USAPA u s a p a +USARPS u s a r p s +USASA u s a s a +USA's u s a's +usata u s a t a +USATF u s a t f +USAT u s a t +usa u s a +.USA u s a +U.S.A. u s a +U.S.A u s a +USA u s a +USAya u s a y a +USBA u s b a +USBC's u s b c's +USBC u s b c +USBs u s b's +USB u s b +USBWA u s b w a +USCCB u s c c b +USCF u s c f +USCGAUX u s c g a u x +USCGC u s c g c +USCG u s c g +USCHS u s c h s +Uscie u s c i e +USCIRF u s c i r f +USCIS u s c i s +USCITA u s c i t a +USCOB u s c o b +USCRP u s c r p +U.S.C.'s u s c's +USC's u s c's +USCT u s c t +U.S.C. u s c +USC u s c +USDA's u s d a's +U.S.D.A. u s d a +USDA u s d a +USDoE u s d o e +USDOJ u s d o j +USDP u s d p +USDTV u s d t v +usd u s d +USD u s d +USEER u s e e r +USENIX u s e n i x +USFbA u s f b a +USFE u s f e +USFF u s f f +USFLPA u s f l p a +USFL u s f l +USFS u s f s +USF u s f +USF&WS u s f and w s +USFWS u s f w s +USFW u s f w +USGA u s g a +U.S.G.S. u s g s +USGS u s g s +U.S. H.R. u s h r +UShs u s h's +Usia u s i a +USIA u s i a +usih u s i h +USIP's u s i p's +USIP u s i p +USISL u s i s l +USIS u s i s +USIU u s i u +USJA u s j a +USJ u s j +USLES u s l e s +USLHE u s l h e +USLMRA u s l m r a +USLTA u s l t a +USL u s l +Uslu u s l u +Usmar u s m a r +USMA u s m a +USMC M u s m c +USMCR u s m c r +USMC's u s m c's +USMC u s m c +USMLE u s m l e +USML u s m l +USMMA u s m m a +USM u s m +Usna u s n a +USNA u s n a +USNG u s n g +Usnic u s n i c +USNM u s n m +USNO u s n o +USNR u s n r +USNSCS u s n s c s +USNS u s n s +USNTDP u s n t d p +U.S.N. u s n +USN u s n +USOC's u s o c's +USOC u s o c +Usoi u s o i +Usos u s o's +Uso u s o +USO u s o +USPHL u s p h l +USPO u s p o +USPPD u s p p d +U.S.P.Q. u s p q +USPSA u s p s a +USPs u s p's +USPS u s p s +USPTA u s p t a +USPTO u s p t o +U.S.P. u s p +USP u s p +USRA u s r a +USRC u s r c +USSA u s s a +USSBA u s s b a +USSB u s s b +USSD u s s d +USSF's u s s f's +USSF u s s f +USSR's u s s r's +USSR u s s r +U.S.S.R u s s r +U.S.'s u s's +US's u s's +U.S.S. u s s +USS u s s +Usti u s t i +Ustka u s t k a +USTR's u s t r's +USTR u s t r +UST's u s t's +Usui u s u i +usum u s u m +U. S. u s +U.S. u s +Usut u s u t +USVC u s v c +Usvit u s v i t +USVI u s v i +U.S.V. u s v +USWA u s w a +USWNT's u s w n t's +USWNT u s w n t +U.S.W. u s w +USW u s w +Uta's u t a's +uta u t a +Uta u t a +UTA u t a +UTB u t b +Utca u t c a +UTC u t c +Utd's u t d's +Utd u t d +utea u t e a +UTEP u t e p +Utes u t e's +UTET u t e t +UTF u t f +Uth u t h +uti u t i +Uti u t i +UTJ u t j +UTMB u t m b +UTMF u t m f +UTM u t m +Utne u t n e +Utnur u t n u r +Uto u t o +UTO u t o +UTPB u t p b +UTP u t p +utrci u t r c i +UTRGV u t r g v +UTR u t r +utsav u t s a v +Utsav u t s a v +UTSC u t s c +UTSI u t s i +UT's u t's +UTS u t s +Utsu u t s u +UTTR u t t r +U.T. u t +Utu u t u +Utva u t v a +UTVA u t v a +UTV u t v +UUA u u a +UUCP u u c p +Uuh u u h +UUKKY u u k k y +Uul u u l +Uulu u u l u +UUP u u p +Uusi u u s i +UUs u u's +Uuto u u t o +UUT u u t +Uvac u v a c +UVB u v b +UVC u v c +Uvea u v e a +UVF u v f +UVIMB u v i m b +UVI u v i +uvnitr u v n i t r +Uvo u v o +UVT u v t +UVU u v u +U.V. u v +UV u v +UWAP u w a p +Uwasa u w a s a +UWASA u w a s a +U.W.A. u w a +UWA u w a +UWB u w b +UWC u w c +UWFi u w f i +UWF u w f +UWG u w g +UWH u w h +UWI u w i +UWM's u w m's +UWP u w p +UWRF u w r f +UWSP's u w s p's +UWSU u w s u +UW's u w's +UWS u w s +UW u w +UWW u w w +VASP v a s p +VA's v a's +V. A. v a +V.A. v a +VA v a +V&A v and a +Vav v a v +VAV v a v +Vay v a y +VBA v b a +VBBS v b b s +VBE v b e +VBI v b i +VBM v b m +VBR v b r +vb v b +V. B. v b +V.B. v b +VB v b +VCA's v c a's +VCA v c a +VCCS v c c s +VCDs v c d's +VCD v c d +VCE v c e +VCH v c h +VCIT v c i t +VCI v c i +VCJ v c j +V. C. M. v c m +VCO v c o +VCP v c p +VCQ v c q +VCRs v c r's +VCR v c r +VCSELs v c s e l's +VCSEL v c s e l +VCs v c's +VCS v c s +VCTs v c t's +VCT v c t +VCU v c u +vc v c +V. C. v c +V.C. v c +VC v c +VCXIV v c x i v +VCY v c y +VDAC v d a c +vda v d a +Vda v d a +VDCs v d c's +VDC v d c +vdiq v d i q +VDJ v d j +VDL's v d l's +VDL v d l +vdm v d m +VDM v d m +VDP v d p +VDR v d r +VDSL v d s l +VDUP v d u p +VDU v d u +V. D. v d +V.D. v d +VD v d +VDV v d v +V. D. W. v d w +vez v e z +Vez v e z +VEZ v e z +V.F.A. v f a +VFA v f a +VFC v f c +VFDS v f d s +V.F.D. v f d +VFD v f d +VFIL v f i l +VFLI v f l i +VFL's v f l's +VFL v f l +VFMC v f m c +VFM v f m +vfp v f p +vfr v f r +VFR v f r +VF's v f's +VFS v f s +VFTS v f t s +VFTX v f t x +VFU v f u +V. F. v f +V.F. v f +VF v f +VFW v f w +VFX v f x +VGA v g a +V'Ger v g e r +VGIK v g i k +VGKO v g k o +VGMA v g m a +vgmdb v g m d b +VGMDB v g m d b +VGM v g m +VGo v g o +VGP v g p +VGSCs v g s c's +VGSoM v g s o m +VGTU v g t u +VGU v g u +V. G. v g +V.G. v g +VG v g +VGV v g v +VGX v g x +VHB v h b +VHC v h c +VHD v h d +VHF v h f +VHL v h l +VHP v h p +VHR v h r +VHSL v h s l +VHS v h s +V. H. v h +VH v h +V.I.C.'s v i c's +Vict v i c t +VICT v i c t +viita v i i t a +vijf v i j f +vij v i j +Vij v i j +V.I.P.s v i p's +VIPs v i p's +V.I.P. v i p +VIP v i p +V. I. v i +V.I. v i +V. J. J. v j j +VJL v j l +VJs v j's +VJTF v j t f +V. J. v j +V.J. v j +VJ v j +VKG v k g +VKIBC's v k i b c's +V. K. N. v k n +VKOS v k o s +VKS v k s +V. K. v k +V.K. v k +VK v k +Vlah v l a h +VLA v l a +VLBA v l b a +VLBI v l b i +VLB v l b +VLCCs v l c c's +VLCS v l c s +VLC v l c +VLDB v l d b +VLDL v l d l +vlei v l e i +VLE v l e +VLEX v l e x +VLF v l f +Vlijt v l i j t +VLN v l n +VLQ v l q +VLR v l r +VLSI v l s i +VLS v l s +VLT v l t +V. L. v l +V.L. v l +VL v l +VMAs v m a's +VMAT v m a t +VMA v m a +VMCAS v m c a s +VMCM v m c m +VME v m e +VMFM v m f m +VMF v m f +VMG v m g +VMI's v m i's +VMI v m i +VMK v m k +VMM v m m +VMPS v m p s +VMS v m s +VMT v m t +vm v m +V. M. v m +V.M. v m +VM v m +VNAF v n a f +VNAV v n a v +VNCH v n c h +VNC v n c +vner v n e r +V.N.E. v n e +VNI v n i +VNO v n o +VNPT v n p t +VNQDD v n q d d +VNR v n r +VNSA v n s a +VNS v n s +VNTH v n t h +VNTR v n t r +VNU v n u +V. N. v n +V.N. v n +VN v n +V.O. v o +VO v o +Vov v o v +VOX's v o x's +Voz v o z +VPAL v p a l +VPD v p d +VPH v p h +vpis v p i s +V. P. I. v p i +VPI v p i +VPLS v p l s +VPL v p l +VPM v p m +VPNs v p n's +VPN v p n +VPO's v p o's +VPP v p p +VPRO's v p r o's +VPRO v p r o +VPS v p s +vpu v p u +V. P. v p +V.P. v p +VP v p +VQT v q t +vq v q +VQ v q +vrak v r a k +VRA v r a +Vrba v r b a +Vrbuv v r b u v +VRD v r d +Vrej v r e j +vrem v r e m +VRE v r e +VRF v r f +Vrin v r i n +VRI v r i +vrj v r j +VRLA v r l a +vrn v r n +vroee v r o e e +vrou v r o u +vrouw v r o u w +Vrouw v r o u w +VRO v r o +VRR v r r +Vrsac v r s a c +VRSA v r s a +VR's v r's +VRS v r s +Vrtis v r t i's +VRT's v r t's +VRT v r t +V&R v and r +vr v r +V. R. v r +V.R. v r +VR v r +VRV v r v +VSANs v s a n's +VSAN v s a n +VSAP's v s a p's +VSATs v s a t's +VSB's v s b's +VSB v s b +VSCP v s c p +VSC v s c +VSD v s d +Vsekh v s e k h +vse v s e +VSE v s e +VSFV v s f v +VSG v s g +VSI v s i +VSM v s m +VSNL v s n l +vso v s o +VSO v s o +VSPs v s p's +VSP v s p +VSS v s s +VSTM v s t m +VST v s t +VSU v s u +vs. versus +_vs._ versus +VSV v s v +VSW v s w +VSX v s x +vsyo v s y o +VSZ v s z +VTA v t a +VTB v t b +VTCO v t c o +VTD v t d +VTEC v t e c +VTE v t eL +Vtic v t i c +VTi v t i +VTL v t l +VTM v t m +VTR v t r +VTSK v t s k +V&T's v and t's +VTS v t s +VTU v t u +V&T v and t +VTVL v t v l +V.T. v t +VT v t +VTV v t v +Vuur v u u r +VUU v u u +V.V.B. v v b +VVCS v v c s +VVD v v d +VVIPs v v i p's +VVI v v i +VVMC v v m c +VVO v v o +VVS v v s +VVT v v t +V. V. v v +V.V. v v +VV v v +VWM v w m +VWR v w r +VW's v w's +VWs v w's +VWS v w s +VW v w +VXL v x l +VXR v x r +VX v x +Vyg v y g +vyr v y r +vy v y +VY v y +vz v z +V. Z. v z +WAAAAAGH w a a a a a g h +WAAAF w a a a f +WAAC w a a c +WAAF w a a f +WAAI w a a i +WAAV w a a v +Waay w a a y +WABB w a b b +WABC w a b c +WABD w a b d +WABG's w a b g's +WABG w a b g +W. A. C. w a c +W.A.C. w a c +Wa'il w a i l +WAIO w a i o +W.A.J. w a j +Wakf w a k f +W.A.K.O. w a k o +wa'l w a l +WANFL w a n f l +WAOK w a o k +waqf w a q f +waqt w a q t +Waqt w a q t +WAQZ w a q z +WARH w a r h +W.A.R. w a r +WASC w a s c +Wasl w a s l +WASL w a s l +W. A. S. w a s +W.A.S. w a s +Watfa w a t f a +WATL w a t l +WATW w a t w +wau w a u +Wau w a u +WAU w a u +WAVF w a v f +WAVM w a v m +W. A. w a +W.A. w a +WA w a +WAWLI w a w l i +waw w a w +WAWZ w a w z +WAYH w a y h +waza w a z a +Waza w a z a +WBAL's w b a l's +WBAM w b a m +WBA w b a +WBBL w b b l +WBBM w b b m +WBBP w b b p +WBCA w b c a +WBCN w b c n +W.B.C. w b c +WBC w b c +WBCY w b c y +WBEV w b e v +WBEZ w b e z +WBFJ w b f j +WBFO w b f o +WBF w b f +WBG w b g +WBHY w b h y +WBIC w b i c +WBIL w b i l +WBJEE w b j e e +WBKO w b k o +WBK w b k +WBLQ w b l q +WBME w b m e +WBNI w b n i +WBNX w b n x +WBON w b o n +WBO w b o +WBQC w b q c +WBRC w b r c +WBRE w b r e +WBRO w b r o +WBRZ w b r z +WBs w b's +WBS w b s +WBTI w b t i +WBT's w b t's +WBT w b t +WBTW w b t w +WBTZ w b t z +WBUR w b u r +WBVE w b v e +WBV w b v +W. B. w b +W.B. w b +WB w b +WBXH w b x h +WBZB w b z b +WBZ w b z +WCAA w c a a +WCAG w c a g +WCAU w c a u +WCA w c a +WCBA w c b a +WCBN w c b n +W&CBR w and c b r +WCBR w c b r +WCBS w c b s +WCB w c b +WCCES w c c e s +WCCG w c c g +WCCO w c c o +WCC's w c c's +WCC w c c +WCDA w c d a +WCEO w c e o +WCFL w c f l +WCF w c f +WCG w c g +WCHA w c h a +WCHL's w c h l's +WCHL w c h l +WCHS w c h s +WCHV w c h v +WCIU w c i u +WCKD w c k d +WCKY w c k y +WCLN w c l n +WCLU w c l u +WCLV w c l v +WCL w c l +WCMA w c m a +WCMC w c m c +WCMFA w c m f a +WCMH w c m h +WCMR w c m r +WCM w c m +WCNB w c n b +WCOZ w c o z +WCPO w c p o +WCPW w c p w +WCRC w c r c +WCRE w c r e +WCRL w c r l +WCSA w c s a +WCSM w c s m +WCS w c s +WCTD w c t d +WCTG w c t g +W.C.T.U. w c t u +WCTU w c t u +WCTV w c t v +WCT w c t +WCTX w c t x +WCVB's w c v b's +WCVB w c v b +WCVE w c v e +WCWA w c w a +wc w c +W. C. w c +W.C. w c +WC w c +WCWC w c w c +WCW w c w +WCXR w c x r +WCYB w c y b +WCZE w c z e +WDAF w d a f +WDA w d a +WDC w d c +WDDM w d d m +WDFN w d f n +WDF w d f +WDGY w d g y +WDH w d h +WDIA w d i a +WDIF w d i f +WDIG w d i g +WDIS w d i s +WDIV w d i v +WDJT w d j t +WDLA w d l a +WDL w d l +WDM w d m +WDNN w d n n +WDNS w d n s +WDNT w d n t +WDOK w d o k +WDO w d o +WDRE w d r e +WDR w d r +WDSE w d s e +WDSN w d s n +WDSO w d s o +WDSU w d s u +WDS w d s +WDTM w d t m +WDUQ w d u q +WDVE w d v e +WDVR w d v r +W. D. w d +W.D. w d +WDWS w d w s +WDW w d w +WDXZ w d x z +WDYZ w d y z +W. E. B. w e b +W.E.B. w e b +W. E. H. w e h +W.E.H. w e h +WEVD w e v d +WEVV w e v v +W. E. w e +W.E. w e +WEZQ w e z q +WFAA w f a a +WFAL w f a l +WFAN w f a n +WFB w f b +WFCA w f c a +WFC w f c +WFDC w f d c +WFDD w f d d +WFDF w f d f +WFDU w f d u +WFD w f d +WFGF w f g f +WFHE w f h e +W.F.H. w f h +WFH w f h +WFIKKN w f i k k n +WFIL w f i l +WFIRST w f i r s t +WFL's w f l's +WFL w f l +WFLX w f l x +WFMT w f m t +WFMU w f m u +W. F. M. w f m +WFM w f m +WF&NW w f and n w +WFNZ w f n z +WFOY w f o y +WFPC w f p c +WFPF w f p f +WFPMA w f p m a +W.F.P. w f p +WFP w f p +WFSBP w f s b p +WFS w f s +WFTDA w f t d a +WFTM w f t m +WFTV w f t v +WFT w f t +WFVA w f v a +W&F w and f +WFWA w f w a +W. F. w f +W.F. w f +WF w f +WFWM w f w m +WFXR w f x r +wga w g a +WGA w g a +WGBH w g b h +WGBI w g b i +W. G. B. w g b +WGB w g b +WGCB w g c b +W.G.C. w g c +WGC w g c +WGEM w g e m +WGFT w g f t +WGGH w g g h +WGHP w g h p +WGH w g h +WGHW w g h w +WGI w g i +WGN's w g n's +WGNT w g n t +wgn w g n +WGN w g n +WGOK w g o k +WGO w g o +WGP w g p +WGPX w g p x +WGRP w g r p +WGRT w g r t +WGRV w g r v +WGSA w g s a +WGSS w g s s +WGS w g s +WGUN w g u n +wg w g +W. G. w g +W.G. w g +WG w g +WGWW w g w w +WGZR w g z r +W.H.B. w h b +WHCA w h c a +WHCB w h c b +WHCC w h c c +W. H. C. w h c +W.H.C. w h c +WHC w h c +WHDG w h d g +WHDI w h d i +W.H.D. w h d +WHEC w h e c +WHFA w h f a +WHFM w h f m +WHFS w h f s +WHHS w h h s +W.H.H. w h h +WHJA w h j a +W.H.J. w h j +WHJY w h j y +WHKT w h k t +WHL's w h l's +WHL w h l +WHNO w h n o +WHP w h p +WHRC w h r c +WHSN w h s n +WHSS w h s s +W.H.S. w h s +WHS w h s +W. H. T. w h t +WHTZ w h t z +W&H w and h +W. H. w h +W.H. w h +WH w h +Whyld w h y l d +WHYT w h y t +W.I.B.C. w i b c +Wica w i c a +wici w i c i +WIC's w i c's +WICU w i c u +Wif w i f +WIF w i f +WIJD w i j d +Wijk w i j k +WIP w i p +W.I.S.E. w i s e +W.I.T.C.H. w i t c h +Wiwa w i w a +W. I. w i +WIXY's w i x y's +WJAL w j a l +WJAR w j a r +wjaz w j a z +WJBC w j b c +WJBF w j b f +WJBK w j b k +WJBR w j b r +WJCI w j c i +WJC w j c +WJEC w j e c +WJEF w j e f +WJER w j e r +WJET w j e t +W. J. J. O. w j j o +W.J.J.O. w j j o +WJJZ w j j z +WJKA's w j k a's +WJKA w j k a +WJKS w j k s +WJLA w j l a +WJMK w j m k +WJM w j m +WJOI w j o i +WJRD w j r d +WJR w j r +WJSC w j s c +W&J's w and j's +WJTD w j t d +WJUX w j u x +W&J w and j +W. J. w j +W.J. w j +WJ w j +WJXX w j x x +WJYL w j y l +WJYM w j y m +WJZA w j z a +WJZJ w j z j +WJZ w j z +WKAG w k a g +WKAQ w k a q +WKAR w k a r +WKA w k a +WKBD w k b d +WKBF's w k b f's +WKB w k b +WKBW w k b w +W. K. C. w k c +WKDM w k d m +WKDN w k d n +WKDZ w k d z +WKFS w k f s +W.K.H. w k h +WKH w k h +WKIK w k i k +WKIP w k i p +WKKB w k k b +WKKD w k k d +WKKZ w k k z +WKLH w k l h +WKNDS w k n d s +WKN w k n +WKQI w k q i +WKRR w k r r +WKSE w k s e +WKSM w k s m +WKSU w k s u +WKTU's w k t u's +WKTU w k t u +W.K.V. w k v +W. K. w k +W.K. w k +WKXJ w k x j +WKX w k x +WKYS's w k y s's +WKYT w k y t +WKY w k y +W.L.A.K. w l a k +WLB w l b +WLCH w l c h +W&LE w and l e +Wley w l e y +WLGH w l g h +WLH w l h +WLII w l i i +WLIR w l i r +WLJY w l j y +WLKT w l k t +WLKY w l k y +WLLM w l l m +WLNP w l n p +WLNQ w l n q +WLNS w l n s +WLPA w l p a +WLPR w l p r +WLPWR w l p w r +wlrs w l r s +WLSG w l s g +WLS w l s +WLTM w l t m +WLUK's w l u k's +WLUK w l u k +WLWC w l w c +W. L. w l +W.L. w l +WL w l +WLW w l w +WLYH's w l y h's +WLYN w l y n +WLY w l y +WLZK w l z k +WMAC w m a c +WMAG w m a g +WMAP w m a p +WMAQ w m a q +WMATA's w m a t a's +WMATA w m a t a +W.M.A. w m a +WMA w m a +WMAZ w m a z +WMBG w m b g +WMCAT w m c a t +WMCA w m c a +WMDB w m d b +WMDI w m d i +WMD w m d +WMEE w m e e +WMEN w m e n +WMET w m e t +WME w m e +WMEX w m e x +WMFC w m f c +WMFM w m f m +WMFP w m f p +W.M.F. w m f +WMF w m f +WMGF w m g f +WMG w m g +WMGY w m g y +WMHG w m h g +WMHT w m h t +WMIX w m i x +WMJR w m j r +WMJZ w m j z +WMKQ w m k q +WMKW w m k w +wml w m l +WMLW w m l w +WMLY w m l y +WMMS w m m s +W. M. M. w m m +WMNF w m n f +WMN w m n +WMOB w m o b +WMO w m o +WMRA w m r a +WMRO w m r o +WMSG w m s g +W.M.S. w m s +WMS w m s +WMTS w m t s +WMT w m t +WMTW w m t w +WMUB w m u b +WMU w m u +WMVM w m v m +WMV w m v +W&M w and m +W. M. w m +W.M. w m +WM w m +WMWM w m w m +WMXB w m x b +WMX w m x +WMYE w m y e +WMYL w m y l +WNAK w n a k +W.N.B.A. w n b a +WNBA w n b a +WNBC w n b c +WNBL w n b l +WNBR w n b r +WNB w n b +WNCN w n c n +WNC w n c +WNCX w n c x +WND w n d +WNED w n e d +WNEL w n e l +WNEM w n e m +WNEP w n e p +WNET w n e t +WNEW's w n e w's +WNEW w n e w +WNFM w n f m +WNGL w n g l +WNIT w n i t +WNJN w n j n +WNJO w n j o +WNJY w n j y +WNK w n k +WNLO w n l o +WNLU w n l u +WNL w n l +WNMX w n m x +WNO w n o +WNPT w n p t +WNRN w n r n +WNR's w n r's +WNRV w n r v +WNSH w n s h +WNSR w n s r +WNTM w n t m +WNT w n t +WNUA w n u a +WNUF w n u f +WNUSP w n u s p +WNWBL w n w b l +W. N. w n +W.N. w n +WN w n +WNWO w n w o +WNWS w n w s +WNW w n w +WNYA w n y a +WNYC's w n y c's +WNYC w n y c +WNYM w n y m +WNY w n y +WNYZ w n y z +wnzaa w n z a a +WOAA w o a a +WOAK w o a k +WOA w o a +WOC w o c +W&OD w and o d +W.O.G. w o g +WOGX's w o g x's +WOJG w o j g +WOLH w o l h +WOOM w o o m +WOPC w o p c +Worh w o r h +WOSM w o s m +W. O. w o +W.O. w o +WOWOW w o w o w +WOXY w o x y +WOZN w o z n +WPAC w p a c +WPA w p a +WPBC w p b c +WPBSA w p b s a +WPBS w p b s +WPBT w p b t +W. P. C. w p c +WPC w p c +WPDH w p d h +WPD w p d +WPFF w p f f +WPFL w p f l +WPF w p f +WPGC w p g c +WPHL w p h l +WPH w p h +WPIAL w p i a l +WPI w p i +WPIX w p i x +WPLA w p l a +WPLG w p l g +WPLS w p l s +WPMW w p m w +WPO w p o +WPPT w p p t +WPP w p p +WPRI w p r i +WPRO w p r o +WPRP w p r p +WPSC w p s c +WPSL w p s l +WPS w p s +WPTB w p t b +WPTD's w p t d's +wpt w p t +WPT w p t +WPTY w p t y +W. P. U. w p u +wp w p +W. P. w p +W.P. w p +WP w p +WPXN w p x n +WPYR w p y r +WQDE w q d e +WQHT w q h t +WQIV w q i v +WQLR w q l r +WQLZ w q l z +WQMS w q m s +WQOK w q o k +WQSO w q s o +WQSX w q s x +WQ w q +WQXI w q x i +WQXR w q x r +W. R. A. S. w r a s +WRAT w r a t +WRBU w r b u +WRCB w r c b +WRCN w r c n +WRC w r c +WRDL w r d l +Wrec w r e c +WRFD w r f d +WRFK w r f k +WRFM w r f m +WRFX w r f x +WRGO w r g o +WRGP w r g p +W. R. G. w r g +WRHA w r h a +WRHC w r h c +WRHI w r h i +W.R.I. w r i +WRKA w r k a +WRKO w r k o +WRKT w r k t +W.R.K. w r k +WRKY w r k y +WRNS w r n s +WRN w r n +WRNY w r n y +WRNZ w r n z +WROJ w r o j +WROK w r o k +WROO w r o o +WRPT w r p t +WRQQ w r q q +WRTA w r t a +WRTC w r t c +WRUV w r u v +WRU w r u +WRVL w r v l +WRVS w r v s +WRWB w r w b +W. R. w r +W.R. w r +WR w r +W.R.W. w r w +WRXT w r x t +WSAR w s a r +WSAV w s a v +W.S.A. w s a +WSA w s a +WSAZ w s a z +WSBG w s b g +WSBM w s b m +WSBT w s b t +WSB w s b +WSCA's w s c a's +WSCR w s c r +W. S. C. w s c +WSC w s c +WSDD w s d d +WSDG w s d g +WSDOT w s d o t +WSDP w s d p +WSDR w s d r +WSEG w s e g +WSEK w s e k +WSE w s e +WSFA w s f a +WSFL w s f l +WSFM w s f m +WSFS w s f s +WSFX w s f x +WSGE w s g e +WSGW w s g w +WSHL w s h l +WSHU w s h u +WSH w s h +WSIPC w s i p c +WSIS w s i s +WSJM w s j m +WSJO w s j o +WSJ w s j +WSLU w s l u +WSL w s l +WSMK w s m k +W.S.M. w s m +WSM w s m +WSNJ w s n j +WSNS w s n s +WSNT w s n t +WSN w s n +WSOF w s o f +WSOPE w s o p e +WSOP's w s o p's +WSOP w s o p +WSORR w s o r r +WSO w s o +WSOY w s o y +WSPA w s p a +WSPC w s p c +WSPD w s p d +WSPN w s p n +WSPS w s p s +WSPU w s p u +WSP w s p +WSRA w s r a +WSRE's w s r e's +WSRF w s r f +WSSA w s s a +WSSCC w s s c c +WSSSA w s s s a +WSS w s s +WSTE w s t e +WSTR w s t r +WSUA w s u a +WSUD w s u d +WSUE w s u e +WSUS w s u s +WSU w s u +WSV w s v +WSVZ w s v z +WSWI w s w i +ws w s +W. S. w s +W.S. w s +WS w s +WSW w s w +WSYN w s y n +WSYX w s y x +WTAA w t a a +WTAE w t a e +WTAF w t a f +WTAG w t a g +WTAM's w t a m's +W. T. A. w t a +WTA w t a +WTAW w t a w +WTCC w t c c +W. T. C. w t c +W.T.C. w t c +WTC w t c +WTDY w t d y +WTEL w t e l +WTEV's w t e v's +WTEV w t e v +WTF w t f +WTGB w t g b +WTHR's w t h r's +WTHS w t h s +WTIU w t i u +WTI w t i +WTIX w t i x +WTKG w t k g +WTKO w t k o +WTKR w t k r +WTKS w t k s +WTLS w t l s +WTMA w t m a +WTNH w t n h +WTN w t n +WTOP w t o p +WTO's w t o's +WTOS w t o s +WTO w t o +WTPA w t p a +WTPQ w t p q +WTR w t r +WTs w t's +WTS w t s +WTTF w t t f +WTTM w t t m +WTTs w t t's +WTT w t t +WTTW w t t w +WTTZ w t t z +WTVA w t v a +WTVC w t v c +WTVD w t v d +WTVI w t v i +WTVM w t v m +WTVU w t v u +WTVW w t v w +WTVX w t v x +WTWF w t w f +W. T. w t +W.T. w t +WT w t +WTX w t x +WUAB w u a b +WUAP w u a p +WUAs w u a's +WUC w u c +Wudl w u d l +Wuhr w u h r +Wuhu w u h u +WUKB w u k b +WUPG w u p g +WUPN w u p n +WUPZ w u p z +Wu's w u's +WUSW w u s w +WUTK w u t k +WUVC w u v c +wuv w u v +W. U. w u +Wuz w u z +WVa w v a +WVBX w v b x +WVCA w v c a +WVIT w v i t +WVJC w v j c +WVJP w v j p +WVLZ w v l z +WVNI w v n i +WVNY w v n y +W. V. O. w v o +WVO w v o +WVPA w v p a +WVPO w v p o +WVSSAC w v s s a c +WVSU w v s u +WVTU's w v t u's +WVTU w v t u +WVUE w v u e +WVU w v u +WVUW w v u w +W. V. w v +W.V. w v +WV w v +WWAA w w a a +WWAMI w w a m i +WWASP w w a s p +WWA w w a +WWAX w w a x +WWBZ w w b z +WWCD w w c d +WWC w w c +WWDB's w w d b's +WWDC w w d c +WWDR w w d r +WWD w w d +WWE's w w e's +Wwe w w e +WWE w w e +WWF's w w f's +WWF w w f +WWGC w w g c +WWHT w w h t +WWHW w w h w +WWIS w w i s +WWJP w w j p +WWKB w w k b +WWKL w w k l +WWML w w m l +WWMM w w m m +WWMX w w m x +WWMY w w m y +WWN w w n +WWNW w w n w +WWNY w w n y +WWOX w w o x +WWPG w w p g +WWP w w p +WWPW w w p w +WWRFC w w r f c +WWRV w w r v +WWSS w w s s +WWS w w s +WWTI's w w t i's +WWTI w w t i +WWTN w w t n +WWU w w u +WWVA w w v a +WWVR w w v r +WWV w w v +WWWF w w w f +WWWQ's w w w q's +W. W. w w +W.W. w w +WW w w +WWW w w w +WWWW w w w w +WWYD w w y d +WXB w x b +WXCN w x c n +WXDU w x d u +WXEL w x e l +WXIL w x i l +WXIN's w x i n's +WXIN w x i n +WXJM's w x j m's +WXJ w x j +WXN w x n +WXQW w x q w +WXTG w x t g +WXW w x w +wx w x +WX w x +W.Y. w y +WY w y +wyzc w y z c +WZBC w z b c +WZFG w z f g +WZFT w z f t +WZGC w z g c +WZJQ w z j q +WZKC w z k c +WZLR w z l r +WZLX w z l x +WZRB w z r b +WZRD w z r d +WZST w z s t +W. Z. w z +WZ w z +WZX w z x +WZZM w z z m +WZZX w z z x +xbg x b g +XBMC x b m c +XBM x b m +Xbra x b r a +XBRL x b r l +XBV x b v +XCB x c b +XCE x c e +XCMS x c m s +XCOM x c o m +XCOR x c o r +xCo x c o +XCR x c r +XCT x c t +XCVI x c v i +XCV x c v +X. C. x c +XC x c +XCX x c x +xda x d a +XDDM x d d m +XDH x d h +XDK x d k +XDRs x d r's +XDR x d r +XD's x d's +XDS x d s +X. D. x d +XD x d +Xfce x f c e +XFC x f c +XFDL x f d l +XFD x f d +XFL x f l +XFM x f m +XFS x f s +XFU x f u +xf x f +X.F. x f +XF x f +XHVFC x h v f c +xh x h +X. H. x h +XH x h +XJD x j d +XJ x j +XKE x k e +XKL x k l +xk x k +XK x k +XL x l +XMB x m b +xmc x m c +xml x m l +XML x m l +XMMS x m m s +XMPP x m p p +XMP x m p +XMRV x m r v +XMSN x m s n +XM's x m's +XMU x m u +X.M. x m +XM x m +XN x n +xO x o +X.O. x o +XO x o +XPA x p a +XPD x p d +XPe x p e +XPL x p l +XPN x p n +XP's x p's +XPS x p s +XPT x p t +XPW x p w +XP x p +X.Q. x q +XQ x q +XRCC x r c c +XRCO x r c o +XRDS x r d s +XRD x r d +XRE x r e +XR x r +XSD x s d +XSi x s i +XSI x s i +XSLT x s l t +xsr x s r +XS x s +XTB x t b +XTC x t c +XTG x t g +XTwas x t w a's +xt x t +XT x t +XVAS x v a +XV x v +XWA x w a +XWB x w b +XWF x w f +XWIS x w i s +X. W. x w +XYNU x y n u +XYP x y p +xy x y +XY x y +xyz x y z +xyZ x y z +XYZ x y z +XZ x z +Y. A. y a +Y.A. y a +YBA y b a +YBL y b l +YBNL y b n l +YBN y b n +YBR y b r +YBX y b x +Y. B. y b +Y.B. y b +YB y b +YBYS y b y s +YCC y c c +YCD y c d +ycia y c i a +ycie y c i e +YCL y c l +YCO y c o +YCU y c u +YCV y c v +yc y c +Y. C. y c +Y.C. y c +YC y c +YCYW y c y w +YDA y d a +Ydby y d b y +Yde y d e +YDG y d g +YdiB y d i b +YDK y d k +YDNC y d n c +yd y d +Y. D. y d +Y.D. y d +YD y d +Y.E. y e +YFCMD y f c m d +YFCs y f c's +YFCU y f c u +YFC y f c +yfle y f l e +YF&R y f and r +YF y f +YGB y g b +YGEX y g e x +YGLA y g l a +YGL y g l +YG's y g's +yg y g +Y. G. y g +Y.G. y g +YG y g +YHA y h a +YHWH y h w h +Y. H. y h +YH y h +YIF y i f +YIG y i g +YITP y i t p +Y. I. y i +Y.I. y i +YJO y j o +yj y j +Y. J. y j +YJ y j +Ykt y k t +Y. K. y k +Y.K. y k +YK y k +YKY y k y +Yle y l e +YLE y l e +YLF y l f +yl y l +Y. L. y l +YL y l +YMCA's y m c a's +Y.M.C.A. y m c a +YMCA y m c a +YMF y m f +Ymke y m k e +Y.M.M. y m m +YMM y m m +YMO y m o +ym y m +Y. M. y m +YM y m +yndi y n d i +Yndi y n d i +yne y n e +Yngve y n g v e +Ynis y n i's +YNK y n k +YNU y n u +yn y n +Yn y n +Y. N. y n +Y.N. y n +YN y n +Y.O. y o +YO y o +ypa y p a +YPC y p c +YPF y p f +YPG y p g +YPM y p m +YPO y p o +Y.P.P. y p p +YPP y p p +Y.P.S. y p s +YPT y p t +YPU y p u +Y.P. y p +YP y p +Y. Q. y q +YQ y q +yra y r a +Yra y r a +YRC y r c +YRF y r f +YRK y r k +YRM y r m +YRT y r t +Y&R y and r +YSK'da y s k d a +YSK'ya y s k y a +YSK y s k +YSL y s l +YSRCP y s r c p +YSR y s r +YSU's y s u's +YSU y s u +Ys y's +Y.S. y s +YS y s +YSY y s y +yta y t a +YTB y t b +Ytre y t r e +YTS y t s +YTV's y t v's +YTV y t v +Y&T y and t +Y. T. y t +Y.T. y t +YT y t +Y. V. y v +YWCA y w c a +YWC y w c +Y. W. y w +Y.Y. y y +Z. A. B. z a b +Z. A. z a +Z.A. z a +ZBC z b c +ZBS z b s +ZB z b +ZCE z c e +ZCGWM z c g w m +ZCMI z c m i +ZCTA z c t a +ZCY z c y +ZC z c +Zdar z d a r +zda z d a +ZDF z d f +ZDR z d r +Z.E.R.O. z e r o +ZFC z f c +ZFK z f k +ZFM z f m +ZFP z f p +ZFS z f s +ZFX z f x +ZF z f +ZGE z g e +ZGI z g i +zg z g +Z.G. z g +ZG z g +zh z h +Z. H. z h +Z.H. z h +ZH z h +zijn z i j n +zij z i j +Zij z i j +Z. I. z i +Z. J. z j +ZKK z k k +ZKM z k m +Z.K. z k +Z.L. z l +Z.M. z m +ZNA z n a +ZNF z n f +ZNK z n k +ZnO z n o +ZNW z n w +ZPAP z p a p +Zpav z p a v +ZPAV z p a v +ZPA z p a +ZPK z p k +Z.P. z p +ZP z p +ZQ z q +ZRC z r c +ZrI z r i +ZRK z r k +ZSC z s c +ZSE z s e +ZSF z s f +ZSG's z s g's +ZSIS z s i s +ZSL z s l +ZSO z s o +ZSSS z s s s +Zsuzsa z s u z s a +ZTE z t e +ZWYCI z w y c i +Z.W. z w +Z.X.V. z x v +Z.X. z x +ZYF z y f +Z. Y. z y +Z.Y. z y +Z. Z. z z +Z.Z. z z diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/ipa_symbols.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/ipa_symbols.tsv new file mode 100644 index 0000000..f5559c7 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/ipa_symbols.tsv @@ -0,0 +1,521 @@ +a +aoj +aəj +aː +aːʲ +aː͡j +aː͡ɨ̯ +aˤ +aˤː +a̠ +a̠ː +a̰ +a͡e +a͡i +a͡iː +a͡i̯ +a͡j +a͡o +a͡u +a͡uː +a͡u̯ +a͡w +a͡ə +a͡ɨ̯ +a͡ɪ +a͡ʊ +b +bʱ +bʲ +bː +b̥ +c +cʰ +cː +ç +d +dʲ +dː +d̥ +d̪ +d̪ʱ +d͡z +d͡zʷ +d͡zː +d͡ʑ +d͡ʒ +d͡ʒʱ +d͡ʒʲ +d͡ʒː +e +eː +eːʲ +eː͡j +ẽː +ẽ͡j̃ +e̞ +e̞ː +e̯ +e͡i +e͡iː +e͡ɨ̯ +f +fʲ +fː +h +hː +i +iəj +iəw +iʲ +iː +iːʲ +ĩː +i̥ +i̯ +i͡u +i͡ə +i͡ɛ +j +jː +j̃ +k +kʰ +kʰː +kʲ +kʲʼ +kʷ +kʷʼ +kʼ +kː +k̚ +k̚ʲ +k̟̚ +k͈ +k͡p̚ +l +lʲ +lː +l̥ +l̩ +m +mʲ +mʲː +mː +m̥ +m̩ +n +nʲ +nː +n̥ +n̩ +o +oʲ +oː +oːʲ +ò +õ͡j̃ +õ͡w̃ +o̝ +o̞ +o̞ː +o̯ +o̰ +o͡u +o͡uː +p +pʰ +pʰː +pʲ +pʷʼ +pʼ +pː +p̚ +p̚ʲ +p͈ +p͜f +p͡f +q +qʷ +qʼ +r +rʲ +rː +r̂ +r̂ː +r̥ +r̩ +s +sʰ +sʲ +sʼ +sː +s͈ +t +tʰ +tʰː +tʲ +tʷʼ +tʼ +tː +t̚ +t̪ +t̪ʰ +t͈ +t͜s +t͡s +t͡sʰ +t͡sʰː +t͡sʲ +t͡sʷ +t͡sʼ +t͡sː +t͡ɕ +t͡ɕʰ +t͡ɕ͈ +t͡ʂ +t͡ʂʼ +t͡ʃ +t͡ʃʰ +t͡ʃʰː +t͡ʃʲ +t͡ʃʷ +t͡ʃʼ +t͡ʃː +u +uəj +uʲ +uː +uːʲ +ũː +ũ͡j̃ +u̯ +u͡e +u͡i +u͡j +u͡ɔ +u͡ə +v +vʲ +vː +w +w̃ +x +xʷ +xː +y +yː +yːʲ +y̯ +z +zʲ +zː +z̥ +à +àː +á +áː +â +âː +ã +ã̠ +æ +æː +æ̀ +æ̀ː +æ̂ +æ̂ː +æ͡ɪ +æ͡ʉ +ç +è +èː +é +éː +ê +êː +ì +ìː +í +íː +î +îː +ï +ð +ò +òː +ó +óː +ô +ôː +õ +õː +õ̞ +ø +øː +øːʲ +ø̯ +ù +ùː +ú +úː +û +ûː +ā +āː +ē +ēː +ĕ +ĕ͡ə +ě +ěː +ħ +ĩ +ĩː +ī +īː +ŋ +ŋʲ +ŋ̊ +ŋ̍ +ŋ̟ +ŋ̩ +ŋ͡m +ō +ŏ +ŏ͡ə +œ +œː +œ̃ +œ͡i +œ͡iː +œ͡ʏ +ř +řː +ũ +ũː +ū +ūː +ŭ +ŭ͡ə +ǎ +ǎː +ǐ +ǐː +ǒ +ǒː +ǔ +ǔː +ǣ +ǣː +ɐ +ɐː +ɐ̃ +ɐ̃͡j̃ +ɐ̃͡w̃ +ɐ̯ +ɐ̯̯ +ɑ +ɑː +ɑ̃ +ɑ̃ː +ɒ +ɒʲ +ɒː +ɓ +ɔ +ɔː +ɔˤː +ɔ̀ +ɔ̀ː +ɔ́ +ɔ́ː +ɔ̃ +ɔ̃ː +ɔ̰ +ɔ͡i̯ +ɔ͡ə +ɔ͡ɨ̯ +ɔ͡ɪ +ɔ͡ʊ +ɕ +ɕʰ +ɕː +ɕ͈ +ɖ +ɖʱ +ɗ +ɘ +ɘː +ə +əː +əˤ +ə̀ +ə́ +ə̃ +ə̯ +ə͡u̯ +ə͡w +ə͡ɨ +ə͡ɨ̯ +ɚ +ɛ +ɛʲ +ɛː +ɛˤː +ɛ̀ +ɛ̀ː +ɛ́ +ɛ́ː +ɛ̂ +ɛ̂ː +ɛ̃ +ɛ̃ː +ɛ̄ +ɛ̄ː +ɛ̰ +ɛ͡i +ɛ͡i̯ +ɛ͡u +ɛ͡u̯ +ɛ͡ɪ +ɛ͡ʊ +ɜ +ɜː +ɝ +ɝː +ɟ +ɟː +ɟ͡ʝ +ɡ +ɡʱ +ɡʲ +ɡʷ +ɡː +ɡ̊ +ɣ +ɤ +ɥ +ɦ +ɨ +ɨəj +ɨː +ɨ̃ᵝ +ɨ̞ +ɨ̥ᵝ +ɨ̯ +ɨ͡u̯ +ɨ͡w +ɨ͡ə +ɨᵝ +ɨᵝː +ɪ +ɪː +ɪ̀ +ɪ́ +ɪ̃ +ɪ̯ +ɪ̰ +ɪ͡u̯ +ɪ͡ʊ +ɫ +ɫː +ɬ +ɬʼ +ɭ +ɮ +ɯ +ɯː +ɯ̟̃ᵝ +ɯ̟̊ᵝ +ɯ̟ᵝ +ɯ̟ᵝː +ɰ +ɰ̃ +ɰᵝ +ɱ +ɱ̩ +ɲ +ɲː +ɲ̊ +ɲ̟ +ɳ +ɴ +ɸ +ɸʷ +ɹ +ɻ +ɽ +ɽʱ +ɾ +ɾʲ +ɾː +ɾ̝̊ +ʀ +ʁ +ʁʷ +ʁː +ʂ +ʂʷ +ʃ +ʃʰ +ʃʲ +ʃʷ +ʃʷʼ +ʃʼ +ʃː +ʈ +ʈʰ +ʉ +ʉː +ʊ +ʊ̀ +ʊ́ +ʊ̃ +ʊ̯ +ʊ̯͡i +ʊ̯͡ɨ +ʊ̰ +ʋ +ʌ +ʌ̹ +ʍ +ʎ +ʏ +ʏː +ʏ̯ +ʐ +ʐʷ +ʑ +ʒ +ʒʲ +ʒʷ +ʒː +ʔ +ʔʲ +ʔʷ +ʝ +˦ˀ˥ +˦˥ +˦˧˥ +˦˩ +˧ˀ˨ +˧˦ +˧˧ +˧˨ +˧˩ +˨˩ +˨˩˦ +˨˩˨ +β +θ +χ +χʷ +χː +ḛ +ḭ +ṵ +ẽ +ẽː +ẽ̞ +‿ \ No newline at end of file diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv new file mode 100644 index 0000000..a55a04b --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv @@ -0,0 +1,21 @@ +Mr. mister +Mrs. misses +Dr. doctor +Drs. doctors +Co. company +Lt. lieutenant +Sgt. sergeant +St. saint +Jr. junior +Maj. major +Hon. honorable +Gov. governor +Capt. captain +Esq. esquire +Gen. general +Ltd. limited +Rev. reverend +Col. colonel +Mt. mount +Ft. fort +etc. et cetera diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/symbol.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/symbol.tsv new file mode 100644 index 0000000..6f2f8c6 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/symbol.tsv @@ -0,0 +1,23 @@ +& and +# hash +@ at +§ section +™ trademark +® registered trademark +© copyright +_ underscore +% percent sign +* asterisk ++ plus +/ slash += equal sign +^ circumflex +| vertical bar +~ tilde +$ dollar +£ pound +€ euro +₩ won +¥ yen +° degree +º degree diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/tts.tsv b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/tts.tsv new file mode 100644 index 0000000..e819247 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/data/whitelist/tts.tsv @@ -0,0 +1,3851 @@ +Ph.D. PHD +Hon. honorable +Mt. Mount +Maj. Major +Rev. Reverend +w/o without +a/c number account number +c/o care of +Gov. governor +vs. versus +vs versus +dept. department +prof. professor +est. estimated +vol volume +vol. volume +bldg. building +Bldg. Building +apt. apartment +Apt. Apartment +World War I World War one +World War II World War two +etc. etcetera. +SnO2 tin four oxide +dept department +HVAC H-vac +SPDR spider +ZIP zip +~ approximately +κ kappa +ω omega +α alpha +ν nu +δ delta +Δ delta +Α alpha +β beta +Β beta +χ chi +Χ chi +ε epsilon +Ε epsilon +φ phi +Φ phi +γ gamma +Γ gamma +η eta +Η eta +ι iota +Ι iota +κ kappa +Κ kappa +λ lambda +Λ lambda +μ mu +Μ mu +ν nu +Ν nu +ο omicron +Ο omicron +π pi +Π pi +θ theta +Θ theta +ρ rho +Ρ rho +σ sigma +Σ sigma +τ tau +Τ tau +υ upsilon +Υ upsilon +ω omega +Ω omega +ξ xi +Ξ xi +ψ psi +Ψ psi +ζ zeta +Ζ zeta +ltd limited +int'l international +A. D AD +A.D AD +a. d AD +a.d AD +a. d. AD +a.d. AD +B. C BC +B.C BC +b. c BC +b.c BC +A. D. AD +A.D. AD +B. C. BC +B.C. BC +b. c. BC +b.c. BC +A. A. a a +A.A. AA +A&A A and A +AAAs AAA's +AaB AAB +Aabo AABO +aac AAC +AACNo AACNO +AADTs AADT's +Aadu AADU +AAFld AAFLD +Aap AAP +A. B. AB +A.B. AB +Abhi ABHI +abl ABL +Abl ABL +Abp ABP +Abr ABR +abv ABV +Abz ABZ +A&C A and C +A. C. AC +A.C. AC +A/C AC +acac ACAC +Acad ACAD +ACC&S ACC and S +Achs ACH's +Acis ACI's +acls ACLS +ACMs ACM's +AcpA ACPA +AcpB ACPB +acq ACQ +A&CR A and CR +AcSOC ACSOC +acu ACU +AC&W AC and W +Acy ACY +A&D A and D +Adab ADAB +A. D. AD +A.D. AD +A.D.A.M. adam +AdaSL ADASL +AdC ADC +ADCs ADC's +AD&D AD and D +AdK ADK +Adl ADL +ADLs ADL's +adm ADM +Adm ADM +admn ADMN +ADNs ADN's +AdP ADP +ADRs ADR's +ADSRs ADSR's +ADSs ADS's +Adwa ADWA +A&E A and E +A. E. AE +A.E. AE +Aed AED +aegte AEGTE +Aeka AEKA +ael AEL +Aema AEMA +aen AEN +aere AERE +AEre AERE +AErn AERN +Aert AERT +Aes AE's +aet AET +Afa AFA +af AF +Af AF +A. F. AF +A.F. AF +Afaf AFAF +afc AFC +AFCSThe AFCSTHE +Afd AFD +AfD AFD +AfDB AFDB +Afe AFE +Aff AFF +Afi AFI +Afiq AFIQ +AFMs AFM's +AfNS AFNS +Afo AFO +Afr AFR +A. G. AG +A.G. AG +Agco AGCO +Agda AGDA +AG&G AG and G +agg AGG +Agi AGI +Agis AGI's +agli AGLI +Agn AGN +AgN AGN +AGNs AGN's +AGRs AGR's +Agsu AGSU +agt AGT +Agte AGTE +Agu AGU +A. H. AH +A.H. AH +Ahdhra AHDHRA +ahe AHE +Ahe AHE +Ahirs AHIR's +Ahk AHK +Ahsa'i AHSAI +Ahta AHTA +aht AHT +Ahu AHU +Ahva AHVA +A&I A and I +A. I. AI +A.I. AI +Aias AIA's +aici AICI +Aicme AICME +AICs AIC's +Aiel AIEL +Aife AIFE +AiG AIG +Aigis AIGI's +Aija AIJA +Aik AIK +AIs AI's +Aist AIST +Aitu AITU +Aiud AIUD +aius AIUS +A. J. AJ +A.J. AJ +Ajba AJBA +Ajmi AJMI +Ajok AJOK +Ajsa AJSA +Aju AJU +Akaa AKAA +a.k.a. AKA +aka. AKA +aka AKA +Aka. AKA +Aka AKA +A&K A and K +A. K. AK +A.K. AK +AKAPs AKAP's +akas AKA's +AKAs AKA's +akh AKH +Akpa AKPA +Aku AKU +A&L A and L +A. L. AL +A.L. AL +Alh ALH +ALUs ALU's +A'ma AMA +A&M A and M +a.m. AM +a.m AM +A. M. AM +A.M. AM +AMAs AMA's +amb AMB +Amb AMB +AmBX AMBX +AMCs AMC's +Amda AMDA +Amd AMD +AM&FM AM and FM +Amha AMHA +aml AML +Amli AMLI +Amlwch AMLWCH +Amm AMM +AM&O AM and O +Ampt AMPT +amr AMR +A&M's A and M's +Ams' AM's +amsl AMSL +Amta AMTA +amt AMT +Amt AMT +Amts AMT's +Amu AMU +A. N. AN +A.N. AN +andFHL ANDFHL +andPPP ANDPPP +Anej ANEJ +Angti ANGTI +Angu ANGU +Anhe ANHE +Anr ANR +Anrep ANREP +A&O A and O +A. O. AO +A.O. AO +AOCCs AOCC's +Aodh AODH +Aodla AODLA +Aogo AOGO +Aoke AOKE +Aoko AOKO +Aola AOLA +Aol AOL +Aone AONE +Aoni AONI +Aoos AOO's +aor AOR +AORs AOR's +aos AOS +AoS AOS +Aotus AOTU's +aov AOV +aovf AOVF +A&P A and P +A. P. AP +A.P. AP +Apc APC +APCh APCH +APCs APC's +apg APG +Aph APH +Apiao APIAO +Api API +APi API +Apic APIC +APICv APICV +Apl APL +Aplu APLU +Apphttp APPHTTP +Appl APPL +appr APPR +Appts APPT's +appu APPU +Appu APPU +A&P's A and P's +Aps AP's +APs AP's +AP&T AP and T +Aqa AQA +Aql AQL +Aqr AQR +A&R A and R +A. R. AR +A.R. AR +ArgR ARGR +ARGs ARG's +ArH ARH +ARMv ARMV +Arnd ARND +arv ARV +Arv ARV +Arwi ARWI +As'ad ASAD +Asai ASAI +Asao ASAO +A.s A's +A. S. AS +A.S. AS +Asasp ASASP +asbl ASBL +asci ASCI +Asci ASCI +Asdis ASDI's +Asfi ASFI +Asn ASN +Aso ASO +Aspe ASPE +Asr ASR +Assn ASSN +assoc ASSOC +Assoc ASSOC +ASSPs ASSP's +Asst ASST +Aste ASTE +Astt ASTT +Aswa ASWA +A&T A and T +atac ATAC +Atac ATAC +Atad ATAD +Ata's ATA's +Atas ATA's +A. T. AT +A.T. AT +ATBs ATB's +atc ATC +atcc ATCC +Atg ATG +ath ATH +Ath ATH +Atia's ATIA's +Ati ATI +ATi ATI +Atid ATID +Atiiq ATIIQ +Atil ATIL +Atiq ATIQ +Ativ ATIV +Atka ATKA +Atl ATL +Atli ATLI +atm ATM +Atm ATM +ATMs ATM's +Atos ATO's +ATOs ATO's +atpB ATPB +atri ATRI +Atri ATRI +A&T's A and T's +A.T.s AT's +AT&SF AT and SF +AT&T AT and T +attd ATTD +AT&T's AT and T's +atv ATV +ATVs ATV's +AtxA ATXA +A. U. AU +A.U. AU +Aub AUB +Aubl AUBL +AUVs AUV's +Auw AUW +avab AVAB +A. V. AV +A.V. AV +AVCs AVC's +avg AVG +Avn AVN +Avo AVO +AvP AVP +AVRs AVR's +Avs' AV's +Avs AV's +avvo AVVO +Awa AWA +AWAs AWA's +A. W. AW +A.W. AW +Awo AWO +Awwa AWWA +A. X. AX +A. Y. AY +A.Y. AY +A. Z. AZ +A.Z. AZ +Azg AZG +Azi AZI +azm AZM +AzTV AZTV +Baad BAAD +Ba'al BAAL +Baal BAAL +Baam BAAM +Baap BAAP +ba'as BAA's +Baat BAAT +Ba'ath BAATH +B. A. BA +B.A. BA +B&A B and A +Bac BAC +bae BAE +Bae BAE +BAe BAE +Bauw BAUW +B'Av BAV +B&B B and B +bb BB +B. B. BB +B.B. BB +bbc BBC +BB&CI BB and CI +BBC&PJR BBC and PJR +BBQ'er BBQER +B&Bs B and B's +BB&S BB and S +BBSes BBSE's +BB&T BB and T +BbvCI BBVCI +bc BC +B. C. BC +B.C. BC +Bcci BCCI +B.C.'s BC's +BCs BC's +BCSic BCSIC +BdA BDA +bd BD +B. D. BD +B.D. BD +Bde BDE +bds BDS +BdU BDU +B. E. BE +B.E. BE +Bedw BEDW +bef BEF +bei BEI +Bei BEI +Beih BEIH +Beis BEI's +bej BEJ +BEMs BEM's +Ber BER +Bes BE's +bf BF +B. F. BF +B.F. BF +BFFs BFF's +bfy BFY +B. G. BG +B.G. BG +BglII BGLII +bgs BGS +Bgy BGY +Bha BHA +B&H B and H +B. H. BH +B.H. BH +BH&E BH and E +Bhe BHE +BHMs BHM's +bhttp BHTTP +B. I. BI +B.I. BI +Bie BIE +biedt BIEDT +Biem BIEM +B.I.G.'s BIG's +BiH BIH +B. J. BJ +B.J. BJ +BJPs BJP's +B.J.'s BJ's +B&K B and K +B. K. BK +B.K. BK +BKCa BKCA +B. L. BL +B.L. BL +ble BLE +bleg BLEG +blev BLEV +bli BLI +Blla BLLA +BLR&D BLR and D +BLTs BLT's +B&M B and M +B. M. BM +B.M. BM +bmd BMD +bmi BMI +BMPs BMP's +BMRs BMR's +bmt BMT +B&N B and N +bnb BNB +B. N. BN +B.N. BN +Boac BOAC +B&O B and O +B. O. BO +B.O. BO +Boc BOC +BoC BOC +BOCs BOC's +boj BOJ +Boj BOJ +B&O's B and O's +bp BP +B. P. BP +B.P. BP +bpl BPL +B&Q B and Q +B. Q. BQ +Brbic BRBIC +B. R. BR +B.R. BR +BRBs BRB's +Brne BRNE +Brno BRNO +BRTs BRT's +Brza BRZA +brzu BRZU +brzy BRZY +Brzyk BRZYK +BsaL BSAL +B. s B's +B.'s B's +Bs B's +B. S. BS +B.S. BS +bsd BSD +bt BT +B. T. BT +B.T. BT +Btry BTRY +btsan BTSAN +btus BTUS +BTUs BTU's +B.U. BU +B. V. BV +B.V. BV +Bwa BWA +B&W B and W +B. W. BW +B.W. BW +BWTs BWT's +bwwtv BWWTV +Bxa BXA +Bxe BXE +B. Y. BY +byc BYC +bygd BYGD +Byk BYK +byn BYN +Byo BYO +Byou BYOU +Byrl BYRL +bySLC BYSLC +byt BYT +B.Z. BZ +bzhed BZHED +Bzik BZIK +Bzyb BZYB +Caat CAAT +C. A. CA +C.A. CA +C&A C and A +CA&CC CA and CC +Cadw CADW +caeca CAECA +Cae CAE +C.A.M.'s CAM's +Capt. captain +cas CAS +Cas CA's +Casc CASC +cb CB +C. B. CB +C.B. CB +cbc CBC +CB&CNS CB and CNS +CBDs CBD's +CBGBs CBGB's +CbiXS CBIXS +CB&Q CB and Q +CBs CB's +'c C +CcaA CCAA +cca CCA +Cca CCA +C&C C and C +C. C. CC +C.C. CC +ccd CCD +CCDev CCDEV +CCDs CCD's +CCEd CCED +CCi CCI +ccm CCM +CCRCs CCRC's +cct CCT +ccus CCUS +C&D C and D +cdc CDC +cd CD +C. D. CD +C.D. CD +CD&DR CD and DR +CDi CDI +cDNA CDNA +cdnas CDNAS +cDNAs CDNA's +CDOs CDO's +CDPs CDP's +CD&R CD and R +cds CDS +CDs CD's +CDSPCo CDSPCO +CdtA CDTA +CdTe CDTE +CDTi CDTI +CD&V CD and V +Cec CEC +c'e CE +ce CE +C'e CE +Ce CE +C. E. CE +C.E. CE +Ceel CEEL +Cefn CEFN +C&EI's C and EI's +C&EN C and EN +cen CEN +Cen CEN +ceo CEO +Ceol CEOL +Ceorl CEORL +Ceos CEO's +CEOs' CEO's +CEOs CEO's +cer CER +Cer CER +ces CES +Ces CE's +Cex CEX +CeX CEX +Cfa CFA +C&F C and F +cf CF +C. F. CF +C.F. CF +CFOs CFO's +CFTs CFT's +C&G C and G +C. G. CG +C.G. CG +cgl CGL +C&H C and H +C. H. CH +C.H. CH +Chhu CHHU +Chirs CHIR's +CHP'den CHPDEN +CHPs CHP's +Chy CHY +Cia CIA +ci CI +Ci CI +C. I. CI +C.I. CI +Cicic CICIC +cii CII +cim CIM +Cim CIM +Cio CIO +CIOs CIO's +Cip CIP +cis CIS +CIs CI's +cit CIT +CiTD CITD +citS CITS +CitX CITX +Ciuc CIUC +Ciu CIU +CiU CIU +Cixi CIXI +C. J. CJ +C.J. CJ +C. K. CK +C.K. CK +ckx CKX +CLBs CLB's +clc CLC +cl CL +C. L. CL +C.L. CL +Cle CLE +cllr CLLR +CL&N CL and N +Clo CLO +CL&P CL and P +CLs CL's +CLTs CLT's +Clu CLU +Clwyd CLWYD +C&MA C and MA +cm CM +C. M. CM +C.M. CM +CMCs CMC's +Cmde CMDE +cmdr CMDR +cml CML +CMMs CMM's +cmn CMN +Cmte CMTE +cn CN +C. N. CN +C.N. CN +CNMs CNM's +cnr CNR +CNs CN's +CNTs CNT's +Cnut's CNUT's +CNVs CNV's +C&O C and O +C. O. CO +C.O. CO +Co. company +Col. colonel +C&P C and P +cp CP +C. P. CP +C.P. CP +CPCs CPC's +cpo CPO +CPUs CPU's +cq CQ +CQDs CQD's +cr CR +C. R. CR +C.R. CR +Cre CRE +crk CRK +Crkva CRKVA +crkve CRKVE +CRLs CRL's +crm CRM +Crne CRNE +Crni CRNI +Crno CRNO +CRs CR's +csa CSA +Csa CSA +Csak CSAK +Csaky CSAKY +C&S C and S +cs CS +C.s C's +Cs C's +C. S. CS +C.S. CS +C&SF C and SF +csg CSG +CsI CSI +CSis CSI's +CSIs CSI's +CSOs CSO's +cspA CSPA +csp CSP +CSPs CSP's +CSSNCs CSSNC's +Csuz CSUZ +CTAs CTA's +C&T C and T +cte CTE +CTLs CTL's +ctr CTR +Ctrip CTRIP +ctv CTV +cu CU +C. U. CU +Cuyp CUYP +C&V C and V +cvcp CVCP +'cv CV +cv CV +C. V. CV +C.V. CV +CVEs CVE's +CVOs CVO's +CVs CV's +CVTs CVT's +C. W. CW +C.W. CW +Cwej CWEJ +Cwele CWELE +cwm CWM +cwrt CWRT +C.X. CX +C. Y. CY +C.Y. CY +cyl CYL +Cyn CYN +CyP CYP +Cyrl CYRL +Cys CY's +czci CZCI +cz CZ +C.Z. CZ +czy CZY +Daai DAAI +D. A. DA +D.A. DA +D&AD D and AD +D.A.N.C.E. dance +Dav DAV +dBa DBA +db DB +D. B. DB +D.B. DB +DCCs DCC's +D&C D and C +dc DC +D. C. DC +D.C. DC +DCom DCOM +D.C.'s DC's +DCs DC's +DCUs DCU's +D&D D and D +D. D. DD +D.D. DD +DDi DDI +Ddoc DDOC +Ddu DDU +Deah DEAH +D. E. DE +D.E. DE +deg DEG +dei DEI +Dei DEI +DeI DEI +Deijm DEIJM +Deip DEIP +dez DEZ +Dez DEZ +Dfa DFA +DFCs DFC's +D. F. DF +D.F. DF +DfE DFE +DfES DFES +DFPs DFP's +DFs DF's +dfx DFX +D&G D and G +D. G. DG +D.G. DG +Dha DHA +DHBs DHB's +D&H D and H +dh DH +D. H. DH +D.H. DH +d'HEC DHEC +dhe DHE +Dheku DHEKU +Dhod DHOD +dhol DHOL +Dhol DHOL +Dhoo DHOO +Dhor DHOR +Dhou DHOU +dhr DHR +DHSs DHS's +D.I. DI +diy DIY +DiY DIY +Diyn DIYN +DIYs DIY's +Djam DJAM +dj DJ +D. J. DJ +D.J. DJ +DJ'ed DJED +Djem DJEM +Dji DJI +Djo DJO +D'Jok DJOK +djr DJR +D.J.s DJ's +DJs DJ's +DjVu DJVU +DjVus DJVU's +D. K. DK +D.K. DK +dla DLA +dl DL +D. L. DL +D.L. DL +dlia DLIA +DL&LR DL and LR +DLLs DLL's +dlo DLO +DL&W DL and W +D.M.C.'s DMC's +D&M D and M +D. M. DM +D.M. DM +DMed DMED +DM&E DM and E +DMs DM's +DMUs DMU's +Dna DNA +DnA DNA +DnaJ DNAJ +D. N. DN +D.N. DN +Dnepr DNEPR +dnes DNES +Dnes DNE's +DNFs DNF's +DNSBLs DNSBL's +dns DNS +D.O.A.'s DOA's +dod DOD +Dod DOD +DoD DOD +D. O. DO +D.O. DO +D'oh DOH +dok DOK +Dok DOK +domt DOMT +Domt DOMT +Dop DOP +dotCMS DOTCMS +DotSVN DOTSVN +Douw DOUW +dozd DOZD +d'OZ DOZ +Dozhd DOZHD +dp DP +D. P. DP +D.P. DP +dpon DPON +DProf DPROF +DPs DP's +D.Q. DQ +drc DRC +Dr. doctor +D. R. DR +D.R. DR +dri DRI +Dri DRI +DRIs DRI's +Driu DRIU +Driv DRIV +Drnis DRNI's +Dro DRO +Drs. doctors +Drska DRSKA +druj DRUJ +DsbA DSBA +DS&BB DS and BB +dsDNA DSDNA +ds DS +Ds D's +D. S. DS +D.S. DS +DSGi DSGI +dsi DSI +DSi DSI +dsl DSL +DSMs DSM's +dsn DSN +D&SNGRR D and SNGRR +dsu' DSU +DTCs DTC's +DTDs DTD's +D. T. DT +D.T. DT +DT&E DT and E +DT&I DT and I +dtl DTL +DTs DT's +dty DTY +D.U. DU +dva DVA +Dva DVA +DVCs DVC's +dvd DVD +DVDs DVD's +dv DV +D. V. DV +D.V. DV +dve DVE +dvfb DVFB +dvi DVI +dvije DVIJE +Dvin DVIN +Dvir DVIR +Dwa DWA +D&W D and W +D. W. DW +D.W. DW +Dwedw DWEDW +Dwi DWI +dwr DWR +dwur DWUR +dxa DXA +dx DX +D.X. DX +DXers DXER's +Dydd DYDD +dy DY +Dy DY +D. Y. DY +dypl DYPL +dyr DYR +dz DZ +dzis DZIS +Dzog DZOG +Dzor DZOR +Dzus DZU's +E. A. EA +E.A. EA +EAMs EAM's +eas EAS +Eas EA's +Eav EAV +E. B. EB +E.B. EB +Ebn EBN +E&BR E and BR +Ebru EBRU +EBWs EBW's +ecc ECC +Eccl ECCL +ec EC +Ec EC +E. C. EC +E.C. EC +ece ECE +Ece ECE +Ecem ECEM +ecg ECG +ecma ECMA +ECNs ECN's +ECP&DA's ECP and DA's +eCRM ECRM +ect ECT +ECVs ECV's +edb EDB +E. D. ED +E.D. ED +Edh EDH +edhe EDHE +EDMs EDM's +Edmx EDMX +edn EDN +Edn EDN +eds EDS +Eds ED's +Edw EDW +Eeb EEB +E. E. EE +E.E. EE +Eef EEF +Eega EEGA +eene EENE +eep EEP +Ees EE's +eeuw EEUW +Eeuw EEUW +EEZs EEZ's +efc EFC +ef EF +Ef EF +E. F. EF +E.F. EF +eFS EFS +Efs EF's +Efu EFU +Egba EGBA +egfl EGFL +e.g. for example +e. g. for example +EG&G EG and G +Egi EGI +E&GR E and GR +egy EGY +Egyl EGYL +EHAs EHA's +E. H. EH +E.H. EH +ehf EHF +Ehttp EHTTP +EiCs EIC's +E. I. EI +E.I. EI +eIF EIF +Eitr EITR +EITs EIT's +EJBs EJB's +ej EJ +E. J. EJ +E.J. EJ +E. K. EK +E.K. EK +eki EKI +Ekma EKMA +Ekow EKOW +eks EKS +Eks EK's +E. L. EL +E.L. EL +ELTs ELT's +ELUs ELU's +Embd EMBD +eMC EMC +EMDs EMD's +Emea EMEA +E&M E and M +E. M. EM +E.M. EM +Emge EMGE +emra EMRA +E&NA E and NA +Enas ENA's +EnBW ENBW +EncFS ENCFS +Encyc ENCYC +E. N. EN +E.N. EN +Enes ENE's +engl ENGL +Engl ENGL +Engr ENGR +Eni ENI +enn ENN +Enn ENN +ens ENS +Ens EN's +Enso ENSO +ENTJs ENTJ's +EoD EOD +E. O. EO +E.O. EO +Eois EOI's +eok EOK +Eole EOLE +Eol EOL +Eolss EOLS's +Eom EOM +eop EOP +eoptta EOPTTA +Eora EORA +Eorl EORL +Eors EOR's +E&P E and P +Epe EPE +ep EP +Ep EP +E. P. EP +E.P. EP +Eph EPH +Ephs EPH's +epi EPI +Epi EPI +ePO EPO +epos' EPO's +epos EPOS +Epos EPO's +E.P.s EP's +eq EQ +Eq EQ +E. R. ER +E.R. ER +ERJs ERJ's +Eru ERU +Eruv ERUV +Erv ERV +ES3 ES three +Esa ESA +Esam ESAM +Esa's ESA's +Esat ESAT +ESCs ESC's +Esd ESD +E. S. ES +E.S. ES +esi ESI +espn ESPN +ESPNhttp ESPNHTTP +esq ESQ +Esq ESQ +Esq. esquire +Esraa ESRAA +Esra ESRA +Esref ESREF +Esri ESRI +estd ESTD +Estd ESTD +esu ESU +Esva ESVA +Etad ETAD +ETBs ETB's +E. T. ET +E.T. ET +ETFs ETF's +Et'hem ETHEM +Eti ETI +etj ETJ +Eto ETO +ets ETS +Ets ET's +ETs ET's +ett ETT +Ett ETT +Ettre ETTRE +Etts ETT's +Etz ETZ +EUBs EUB's +E. U. EU +E.U. EU +eup EUP +Eurwg EURWG +Euse EUSE +Eusi EUSI +EvaGT EVAGT +E. V. EV +E.V. EV +evl EVL +Evna EVNA +evnt EVNT +EVs EV's +evv EVV +Ewa's EWA's +E. W. EW +E.W. EW +Ewha EWHA +EWOs EWO's +Ewu EWU +excl EXCL +exd EXD +E.X. EX +exhb EXHB +exh EXH +Exptl EXPTL +exsul EXSUL +E&Y E and Y +E. Y. EY +Eyk EYK +eyu EYU +Ezaa EZAA +ez EZ +Ez EZ +Ezh EZH +Ezu EZU +Ezy EZY +Faaa FAAA +faa FAA +Faa FAA +faama FAAMA +FabH FABH +FabR FABR +F.A.B.'s FAB's +Fadl FADL +fa FA +Fa FA +F. A. FA +F.A. FA +Faf FAF +Fafhrd FAFHRD +fai FAI +Fai FAI +FAIPs FAIP's +Faiq FAIQ +fajn FAJN +Fal FAL +farw FARW +fasc FASC +Fasc FASC +fas FAS +Fas FA's +FAs FA's +FasL FASL +F. B. FB +F.B. FB +F.B.G.s FBG's +FBOs FBO's +fcb FCB +FCBs FCB's +F&C F and C +fc FC +F. C. FC +F.C. FC +F.C.'s FC's +FD&C FD and C +FDCPAs FDCPA's +F. D. FD +F.D. FD +FDICs FDIC's +FdI FDI +F. E. FE +F.E. FE +FEGs FEG's +Fes FE's +ffc FFC +ff FF +F. F. FF +F.F. FF +ffm FFM +FFs FF's +F. G. FG +F.G. FG +fgk FGK +FGs FG's +F. H. FH +F.H. FH +fhm FHM +fiadh FIADH +fia FIA +FiBL FIBL +Fi'd FID +F.I F +F&I F and I +F. I. FI +F.I. FI +Figl FIGL +fija FIJA +Fio FIO +FiO FIO +F. J. FJ +F.J. FJ +F. K. FK +F.K. FK +Fla FLA +Fles FLE's +fl FL +F. L. FL +F.L. FL +fli FLI +flyr FLYR +fm FM +F. M. FM +F.M. FM +fMRI FMRI +FMs FM's +F. N. FN +F.N. FN +F. O. FO +FPDs FPD's +fp FP +F. P. FP +F.P. FP +FPGAs FPGA's +FPM&SA FPM and SA +fps FPS +FPSs FPS's +F.Q. FQ +fr FR +F. R. FR +F.R. FR +frs FRS +Fru FRU +Frwydr FRWYDR +fsf FSF +fs FS +F. s F's +Fs F's +F. S. FS +F.S. FS +fsn FSN +FSSs FSS's +FTAs FTA's +Ft. Fort +F. T. FT +F.T. FT +ftp FTP +FtsA FTSA +Ftuh FTUH +F. V. FV +F.V. FV +F&W F and W +F. W. FW +F.W. FW +fwr FWR +fx FX +Fyb FYB +F.Y. FY +Fyn FYN +fyi FYI +fyr FYR +Fyw FYW +F.Z. FZ +Gaac GAAC +G. A. GA +G.A. GA +Gbe GBE +GbE GBE +G. B. GB +G.B. GB +GB&NDR GB and NDR +G&C G and C +G. C. GC +G.C. GC +GCSEs GCSE's +GC&SF GC and SF +Gdal GDAL +Gde GDE +gd GD +G. D. GD +G.D. GD +Gdow GDOW +GDPs GDP's +gdr GDR +Gebr GEBR +Gebt GEBT +Ged GED +ge GE +G. E. GE +G.E. GE +Gek GEK +Gen. general +geq GEQ +Geu GEU +Gev GEV +GeV GEV +Gfa GFA +GF&A GF and A +G. F. GF +G.F. GF +G. G. GG +G.G. GG +Ghe GHE +G. H. GH +G.H. GH +GHGs GHG's +Ghir GHIR +ghra GHRA +GH&SA GH and SA +Giei GIEI +G. I. GI +G.I. GI +G. J. GJ +G.J. GJ +G. K. GK +G.K. GK +Gla GLA +G&L G and L +G. L. GL +G.L. GL +gli GLI +Gli GLI +Glis GLI's +glnA GLNA +Glos GLO's +gma GMA +GmbH GMBH +Gmel GMEL +gm GM +G. M. GM +G.M. GM +gmin GMIN +GMOs GMO's +GMs GM's +gmt GMT +Gmul GMUL +gnb GNB +G. N. GN +G.N. GN +GNP&BR GNP and BR +gnp GNP +GNPs GNP's +G.O. GO +GPCRs GPCR's +GPdI GPDI +G&P G and P +gp GP +G. P. GP +G.P. GP +GpIIb GPIIB +GPMGs GPMG's +GPRs GPR's +GPUs GPU's +G. Q. GQ +grac GRAC +gra GRA +Gra GRA +Grbac GRBAC +GRBs GRB's +grc GRC +GRCs GRC's +gre GRE +Gre GRE +gr GR +G. R. GR +G.R. GR +grrl GRRL +Gryf GRYF +gry GRY +Gry GRY +grz GRZ +G&S G and S +Gs G's +G. S. GS +G.S. GS +GSIs GSI's +GSOp GSOP +GTAs GTA's +gt GT +G. T. GT +G.T. GT +GTi GTI +GTs GT's +G. U. GU +G.U. GU +GUIs GUI's +G. V. GV +G.V. GV +GvpA GVPA +GWe GWE +Gwet GWET +gw GW +G. W. GW +G.W. GW +Gwi GWI +gwr GWR +Gy GY +G. Y. GY +Gyn GYN +Gyps GYP's +gyu GYU +Gyu GYU +H. A. HA +H.A. HA +H&A H and A +HBCo HBCO +HBGAs HBGA's +H. B. HB +H.B. HB +H&BR H and BR +HCEs HCE's +H. C. HC +H.C. HC +hcl HCL +HClO HCLO +HCoV HCOV +HCPs HCP's +Hcy HCY +Hdad HDAD +hDAF HDAF +HDDs HDD's +hd HD +H. D. HD +H.D. HD +hdh HDH +HDi HDI +H. E. HE +H.E. HE +hezb HEZB +Hezb HEZB +H&F H and F +H. F. HF +H.F. HF +hgcA HGCA +hg HG +H. G. HG +H.G. HG +HgO HGO +HGPs HGP's +HgU HGU +HGVs HGV's +H&H H and H +H. H. HH +H.H. HH +H. I. HI +H.I. HI +Hizb HIZB +hja HJA +H. J. HJ +H.J. HJ +Hkam HKAM +H&K H and K +H. K. HK +H.K. HK +Hla HLA +H. L. HL +H.L. HL +Hluk HLUK +hlutr HLUTR +Hly HLY +Hman HMAN +H&M H and M +hm HM +H. M. HM +H.M. HM +HMOs HMO's +hMRE HMRE +H&M's H and M's +H. N. HN +H.N. HN +H. O. HO +H.O. HO +hpc HPC +hp HP +H. P. HP +H.P. HP +H.Q. HQ +H&R H and R +H. R. HR +H.R. HR +hroa HROA +HRo HRO +hrs HRS +HRs HR's +hscy HSCY +Hsee HSEE +hs HS +Hs H's +H. S. HS +H.S. HS +hsi HSI +hSlo HSLO +HSPs HSP's +HSTs HST's +hsv HSV +hTAS HTAS +HTAs HTA's +htc HTC +ht HT +H. T. HT +H.T. HT +hti HTI +Htin HTIN +Htoo HTOO +HtrA HTRA +HTRs HTR's +http HTTP +https HTTPS +Htwa HTWA +H. U. HU +HVCs HVC's +hvcv HVCV +hvem HVEM +Hvem HVEM +H. V. HV +H.V. HV +H. W. HW +H.W. HW +hwy HWY +Hwy HWY +hxt HXT +H. Y. HY +H.Y. HY +H. Z. HZ +HZ&PC HZ and PC +iaaf IAAF +IaaS IAAS +Iaca IACA +Iacob IACOB +IACTs IACT's +Iacub IACUB +iagt IAGT +I. A. IA +I.A. IA +Iapa IAPA +iar IAR +Iar IAR +IAUCs IAUC's +Iax IAX +Ibac IBAC +Ibaes IBAE's +IBCs IBC's +IBDA'A IBDAA +Ibda IBDA +ibdal IBDAL +I. B. IB +I.B. IB +Ibm IBM +Ibne IBNE +ibn IBN +Ibra IBRA +Ibsa IBSA +Ibs IB's +iBT IBT +Ibu IBU +iby IBY +ICBMs ICBM's +Icche ICCHE +icf ICF +I&C I and C +ic IC +Ic IC +I. C. IC +I.C. IC +Ici ICI +iCN ICN +Ico ICO +IcRn ICRN +ics ICS +ICs IC's +ICTs ICT's +ICTVdB ICTVDB +ICv ICV +iCyt ICYT +Iddaa IDDAA +Idd IDD +ID'd IDD +id ID +I. D. ID +I.D. ID +Idi IDI +Idir IDIR +IDLHs IDLH's +Idm IDM +IDPs IDP's +ids IDS +IDs ID's +IDx IDX +i.e. that is +Ifa IFA +ifc IFC +IfF IFF +Ifft IFFT +ifi IFI +Ifi IFI +IFIs IFI's +IfM IFM +Ifni IFNI +IFNs IFN's +Ifop IFOP +IFRSs IFRS's +Iga IGA +IgA IGA +IGFs IGF's +'ig IG +Ig IG +I.G. IG +igi IGI +IgM IGM +ign IGN +Ign IGN +Ih IH +I. H. IH +ihi IHI +ihm IHM +ihn IHN +Ihn IHN +Ihor IHOR +Iht IHT +IIb IIB +IIc IIC +IIfx IIFX +Iiga IIGA +IIga IIGA +IIgs IIG's +IIGs IIG's +IIIb IIIB +IIIBy IIIBY +IIIc IIIC +IIId IIID +I. I. II +I.I. II +ija IJA +Ija IJA +ij IJ +I. J. IJ +Ijok IJOK +Ijui IJUI +Ikh IKH +I.K. IK +IKr IKR +Iksa IKSA +Iku IKU +Ilbe ILBE +IL&FS IL and FS +ili ILI +Ili ILI +I. L. IL +I.L. IL +Ilm ILM +ilu ILU +Ilu ILU +IMbd IMBD +imc IMC +imdb IMDB +IMDb. IMDB +IMDb IMDB +ime IME +I&M I and M +I. M. IM +I.M. IM +Iml IML +Imm IMM +Imms' IMM's +imoa IMOA +impr IMPR +Impr IMPR +iMSNs IMSN's +Imst IMST +Inba INBA +inb INB +incl INCL +ind IND +Ind IND +Infs INF's +inHg INHG +I. N. IN +I.N. IN +InlB INLB +InP INP +Inre INRE +Inspx INSPX +Ints INT's +INTs INT's +Intu INTU +inv INV +Ioba IOBA +IODs IOD's +io. IO +io IO +.I.o. IO +Io. IO +Io IO +.IO IO +I/O IO +Ioka IOKA +Iok IOK +IoM IOM +IOPs IOP's +iOS IOS +Io's IO's +I/Os IO's +I.O.'s IO's +Iosu IOSU +IoT IOT +IoW IOW +IPCs IPC's +ip IP +Ip IP +I.P. IP +Ipo IPO +IPPs IPP's +Ippu IPPU +iPPV IPPV +iPSC IPSC +iPSCs IPSC's +ipse IPSE +iPS IPS +IPs IP's +IPv IPV +iQ IQ +I.Q. IQ +Irgm IRGM +Irig IRIG +Iril IRIL +i'r IR +ir IR +Ir IR +I. R. IR +I.R. IR +Irla IRLA +Irmis IRMI's +iro IRO +Iro IRO +Irra IRRA +IRs IR's +iru IRU +Iru IRU +ISAv ISAV +Isba ISBA +isbn ISBN +ISBNs ISBN's +isCf ISCF +iSC ISC +Isc ISC +Isd ISD +isdn ISDN +Isgec ISGEC +ishq ISHQ +Ishq ISHQ +I. S. IS +I.S. IS +Isl ISL +ISMNs ISMN's +ISPs ISP's +Isra'il ISRAIL +Isra ISRA +iss ISS +Iss IS's +ISSNs ISSN's +Isu ISU +Ite ITE +ITHs ITH's +iti ITI +Iti ITI +I.T. IT +itk ITK +Itk ITK +ITNNs ITNN's +Itoi ITOI +iTP ITP +Itse ITSE +itt ITT +Itu ITU +Itz ITZ +Iucn IUCN +IUDs IUD's +Iuz IUZ +I. V. IV +I.V. IV +IVs IV's +Ivu IVU +Iwas IWA's +Iwaz IWAZ +IWBs IWB's +iwi IWI +iw IW +I. W. IW +iwrg IWRG +iwspy IWSPY +Iwuh IWUH +IXb IXB +IXBs IXB's +IXCs IXC's +IXe IXE +iXL IXL +ixtle IXTLE +Iya IYA +Iyar IYAR +iyem IYEM +Iyi IYI +iy IY +I. Y. IY +I.Y. IY +Jaf JAF +J. A. JA +J&B J and B +jb JB +J. B. JB +J.B. JB +jcis JCIS +jc JC +J. C. JC +J.C. JC +JCRs JCR's +JdeBP JDEBP +jdi JDI +jd JD +J. D. JD +J.D. JD +jdk JDK +J&D's J and D's +J.D.s JD's +J. E. JE +J.E. JE +jf JF +J. F. JF +J.F. JF +J. G. JG +J.G. JG +jh JH +J. H. JH +J.H. JH +JHs JH's +JHSVs JHSV's +JHud JHUD +J. I. JI +J.I. JI +J&J J and J +J. J. JJ +J.J. JJ +Jka JKA +jkd JKD +J&K J and K +J. K. JK +J.K. JK +jkx JKX +J. L. JL +J.L. JL +J. M. JM +J.M. JM +jnb JNB +J. N. JN +J.N. JN +Jno JNO +jnr JNR +J. O. JO +J.O. JO +jpg JPG +J&P J and P +jp JP +J.P. JP +J.Q. JQ +J&R J and R +J. R. JR +J.R. JR +Jr. junior +jr. junior +Jr junior +jr junior +J.R.'s JR's +jsb JSB +Js J's +J. S. JS +J.S. JS +J&T J and T +J. T. JT +J.T. JT +JT&KW JT and KW +J. U. JU +J&V J and V +J. V. JV +J.V. JV +Jwa JWA +J. W. JW +J.W. JW +jx JX +J. Y. JY +J.Y. JY +Jym JYM +Jymn JYMN +J. Z. JZ +J.Z. JZ +K. A. KA +K.A. KA +kbi KBI +K&B K and B +K. B. KB +K.B. KB +K. C. KC +K.C. KC +K.C.'s KC's +kDa KDA +kdal KDAL +K&D K and D +kd KD +K. D. KD +K.D. KD +K. D.'s KD's +K.D.'s KD's +ke KE +Ke KE +K. E. KE +K.E. KE +kfc KFC +K.F. KF +kgr KGR +kgt KGT +KgU KGU +kgv KGV +Khizr KHIZR +K. H. KH +K.H. KH +Khlav KHLAV +Khmu KHMU +Khri KHRI +Khru KHRU +K. I. KI +K.I. KI +Kjer KJER +kj KJ +K. J. KJ +K.J. KJ +Kjop KJOP +KJo's KJO's +K. K. KK +K.K. KK +Kle KLE +kl KL +K. L. KL +K.L. KL +K&M K and M +km KM +K. M. KM +K.M. KM +K&N K and N +kn KN +K. N. KN +K.N. KN +k'o KO +K. O. KO +K.O. KO +KPIs KPI's +K&P K and P +K. P. KP +K.P. KP +Kppen KPPEN +kptm KPTM +krc KRC +KRCs KRC's +K.R.I.T.'s KRIT's +Krka KRKA +kr KR +K. R. KR +K.R. KR +Krne KRNE +kroz KROZ +Kroz KROZ +Kru KRU +ksa KSA +kset KSET +Kseur KSEUR +Ksevt KSEVT +ks KS +K. S. KS +K.S. KS +Ktav KTAV +KT&K KT and K +kt KT +K. T. KT +K.T. KT +Kuaa KUAA +Kud KUD +kuih KUIH +K. U. KU +K.U. KU +Kutb KUTB +Kvik KVIK +Kvit KVIT +K. V. KV +K.V. KV +Kvyat KVYAT +kwa KWA +kwe KWE +Kyse KYSE +kyt KYT +Kyt KYT +kz KZ +K. Z. KZ +L. A. LA +L.A.'s LA's +Lay's LAY's +LBi LBI +L. B. LB +L.B. LB +LBPs LBP's +LCAs LCA's +lcc LCC +LCCs LCC's +LC&DR LC and DR +LCDs LCD's +LCIs LCI's +L. C. LC +L.C. LC +lcn LCN +LCSs LCS's +LCTs LCT's +LCVPs LCVP's +Lda LDA +LDCs LDC's +L. D. LD +L.D. LD +L. E. LE +L.E. LE +LFFCs LFFC's +L. F. LF +L.F. LF +LFs LF's +LFSRs LFSR's +L. G. LG +L.G. LG +LGMs LGM's +LG&RDD LG and RDD +LGs LG's +LGUs LGU's +LGVs LGV's +Lha LHA +L. H. LH +L.H. LH +L. I. LI +L.I. LI +Lje LJE +L. J. LJ +L.J. LJ +L. K. LK +lks LKS +Llapi LLAPI +lle LLE +lli LLI +L&L L and L +L. L. LL +L.L. LL +llp LLP +llu LLU +LMGs LMG's +lm LM +L. M. LM +L.M. LM +LMPs LMP's +LMQs LMQ's +LnAIB LNAIB +L&N L and N +L. N. LN +L.N. LN +L.O.C.'s LOC's +Lokk LOKK +l'OL LOL +L. O. LO +L.O. LO +LPARs LPAR's +L&P L and P +lp LP +L. P. LP +L.P. LP +L&PM L and PM +LPMud LPMUD +LPs LP's +LPThe LPTHE +lr LR +L. R. LR +L.R. LR +LRTs LRT's +LRVs LRV's +LSDs LSD's +LSi LSI +ls LS +Ls L's +L. S. LS +L.S. LS +LSTs LST's +LSVCCs LSVCC's +Ltda LTDA +Ltd. limited +L&T L and T +Lt. lieutenant +lt LT +L. T. LT +L.T. LT +ltoh LTOH +L. U. LU +lv LV +L. V. LV +L.V. LV +L. W. LW +L.W. LW +L.Y. LY +L&YR L and YR +M. A. MA +M.A. MA +M&A M and A +Mbewu MBEWU +mbi MBI +M. B. MB +M.B. MB +mBo MBO +Mbre MBRE +mbr MBR +MCCs MCC's +M. C. MC +M.C. MC +MCs MC's +mcyG MCYG +mdb MDB +MDCs MDC's +mdDA MDDA +MD&DI MD and DI +Mde MDE +MDGs MDG's +MDHUs MDHU's +mDia MDIA +MDic MDIC +mdla MDLA +md MD +M. D. MD +M.D. MD +mdr MDR +M. E. ME +M.E. ME +MFDs MFD's +MFe MFE +M. F. MF +M.F. MF +MFTs MFT's +Mgadla MGADLA +Mgal MGAL +mga MGA +Mga MGA +Mgbo MGBO +MGen MGEN +M&G M and G +M. G. MG +M.G. MG +M&GN M and GN +M&GR's M and GR's +Mha MHA +MHCs MHC's +MH&L MH and L +M&H M and H +M. H. MH +M.H. MH +Mhor MHOR +Mhow MHOW +M.I.A.'s MIA's +M&I M and I +M. I. MI +M.I. MI +M&J M and J +M. J. MJ +M.J. MJ +M. K. MK +M.K. MK +MKs MK's +MLAs MLA's +mlc MLC +MLCs MLC's +mlg MLG +mli MLI +Mlle MLLE +M&L M and L +M. L. ML +M.L. ML +MLPs MLP's +MM&A MM and A +M&M M and M +M. M. MM +M.M. MM +M&M's M and M's +MNCs MNC's +M&NF M and NF +M. N. MN +M.N. MN +M.O.G.U.E.R.A.'s MOGUERA's +M. O. MO +M.O. MO +MpA MPA +MPAs MPA's +MPBu MPBU +MP&I MP and I +MPi MPI +MPLMs MPLM's +M&P M and P +M. P. MP +M.P. MP +MPPs MPP's +M.P.'s MP's +MPs MP's +MPThe MPTHE +Mpu MPU +M. Q. MQ +Mra MRA +MRBs MRB's +mre MRE +MR&LE MR and LE +Mr. mister +M. R. MR +M.R. MR +mRNA MRNA +mRNAs MRNA's +Mrs. misses +MRTs MRT's +msd MSD +Mse MSE +M&S M and S +Ms. miss +M. S. MS +M.S. MS +MSPs MSP's +mst MST +Mta MTA +MTAs MTA's +M&T M and T +M. T. MT +M.T. MT +Mtor MTOR +mtvU MTVU +MTVu MTVU +M. U. MU +M.U.s MU's +MVMs MVM's +M. V. MV +M.V. MV +MVPs MVP's +MVs MV's +MWe MWE +M. W. MW +M.W. MW +M. X. MX +myb MYB +myc MYC +Myc MYC +MyDD MYDD +M. Y. MY +M.Y. MY +MySQL MYSQL +M. Z. MZ +N. A. NA +N.A. NA +N.B.A.'s NBA's +NBAs NBA's +N. B. NB +N.B. NB +nbs NBS +NBTwo NBTWO +NCAAs NCAA's +NCCs NCC's +N. C. NC +N.C. NC +NCOs NCO's +N. D. ND +N.D. ND +N. E. NE +N.E. NE +ner NER +NFATc NFATC +nfed NFED +N. F. NF +N.F. NF +Nge NGE +N. G. NG +N.G. NG +NGOs NGO's +Nha NHA +NHCEs NHCE's +NHCs NHC's +nhi NHI +NHM&W NHM and W +N. H. NH +N.H. NH +NHPs NHP's +N. I. NI +N.I. NI +N'I NI +N. J. NJ +N.J. NJ +Njoo NJOO +N.J.'s NJ's +N. K. NK +N.K. NK +nkvd NKVD +nkv NKV +N. L. NL +N.L. NL +NLRs NLR's +N. M. NM +N.M. NM +N. N. NN +N.N. NN +NPCs NPC's +NPMs NPM's +N. P. NP +N.P. NP +NPs NP's +N. Q. NQ +nri NRI +Nri NRI +NRIs NRI's +NRJs NRJ's +nr NR +N. R. NR +N.R. NR +N. S. NS +N.S. NS +NSOs NSO's +Nta NTA +ntb NTB +N. T. NT +N.T. NT +ntw NTW +N.U. NU +NvDA's NVDA's +N. V. NV +N.V. NV +nwa NWA +N&W N and W +N. W. NW +N.W. NW +nyc NYC +N. Y. NY +N.Y. NY +N'Zif NZIF +NZiK NZIK +N'Zi NZI +N.Z. NZ +O. A. OA +O.A. OA +obl OBL +Oblt OBLT +O.B. OB +O. C. OC +O.C. OC +Octl OCTL +O. D. OD +O.E. OE +O.F. OF +ofr OFR +O&G O and G +O. G. OG +O.G. OG +O. H. OH +O.H. OH +O. I. OI +O.I. OI +O. J. OJ +O.J. OJ +OK'd OKD +OKd OKD +oke OKE +Oke OKE +O&K O and K +O. K. OK +O.K. OK +O. L. OL +O.L. OL +Olo OLO +Olov OLOV +ols OLS +OLs OL's +olvwm OLVWM +olwm OLWM +O&MFL O and MFL +omg OMG +OMGs OMG's +O. M. OM +O.M. OM +OmOm OMOM +Om's OM's +oncu ONCU +ond OND +onf ONF +O. N. ON +O.N. ON +O&O O and O +O. O. OO +O.O. OO +Oop OOP +oor OOR +oose OOSE +Oo's OO's +Ootw OOTW +OPMs OPM's +O. P. OP +O.P. OP +opr OPR +Opr OPR +ORFs ORF's +OR&N OR and N +O. R. OR +osaa OSAA +osa OSA +Osa OSA +OSBs OSB's +O. S. OS +O.S. OS +OSTs OST's +osv OSV +O.T. OT +OTs OT's +Otu OTU +otv OTV +O.U. OU +ovca OVCA +Ovca OVCA +Ovda OVDA +Ovo OVO +ov OV +Ov OV +O. V. OV +O.V. OV +OWCs OWC's +O. W. OW +O.W. OW +pa PA +P. A. PA +P.A. PA +pBCE PBCE +pbc PBC +pb PB +P. B. PB +P.B. PB +PCBs PCB's +PCDDs PCDD's +PCeU PCEU +PCIe PCIE +pci PCI +pcl PCL +PcoA PCOA +pc PC +P. C. PC +P.C. PC +PCRev PCREV +pcs PCS +P.C.s PC's +pDAB PDAB +PDAs PDA's +pdbp PDBP +PDBsum PDBSUM +PDCs PDC's +PD&D PD and D +PDEs PDE's +pdf PDF +PDFs PDF's +P&D P and D +P. D. PD +P.D. PD +PDPs PDP's +PDs PD's +P.E.I.'s PEI's +P. E. PE +P.E. PE +PFs PF's +PFW&C PFW and C +PGMs PGM's +PG&N PG and N +P&G P and G +P. G. PG +P.G. PG +Phlo PHLO +P&H P and H +P. H. PH +P.H. PH +php PHP +PHPs PHP's +P&I P and I +P. I. PI +P.I. PI +Piz PIZ +pj PJ +P. J. PJ +P.J. PJ +PJs PJ's +P. K. PK +P.K. PK +plc PLC +PLCs PLC's +P. L. PL +P.L. PL +PMMoV PMMOV +p.m. PM +p.m PM +P. M. PM +P.M. PM +PMs PM's +PNaCl PNACL +pna PNA +Pnau PNAU +pnb PNB +PNEs PNE's +PNETs PNET's +pneus PNEUS +png PNG +PNNs PNN's +PNoy PNOY +pn PN +P. N. PN +P.N. PN +P.O.D.'s POD's +P&O P and O +P. O. PO +P.O. PO +P.O.W.'s POW's +P&PH P and PH +PPi PPI +P. P. PP +P.P. PP +PPVs PPV's +PPy PPY +pr PR +P. R. PR +P.R. PR +P. s P's +P.'s P's +Ps P's +P. S. PS +P.S. PS +PSSAs PSSA's +pTA PTA +PTAs PTA's +Pte PTE +PTEs PTE's +ptf PTF +Ptie PTIE +PTLs PTL's +Ptol PTOL +pt PT +P. T. PT +P.T. PT +PTTs PTT's +P'Twa PTWA +Puiu PUIU +Pul PUL +PVAs PVA's +PvdA PVDA +pve PVE +P. V. PV +P.V. PV +pvr PVR +PVs PV's +pwll PWLL +P&W P and W +P. W. PW +P.W. PW +Pyi PYI +Pyk PYK +Pyl PYL +PyL PYL +P. Y. PY +Pyu PYU +Pyw PYW +Pyx PYX +P. Z. PZ +Q. A. QA +Q&A Q and A +Q&A's Q and A's +Q&As Q and A's +QbA QBA +Q.B. QB +Q.C. QC +Q. E. QE +Q.H. QH +Q.I. QI +Q. J. QJ +Q. N. QN +QPOs QPO's +Q.V. QV +R. A. RA +R.A. RA +R&A R and A +R&AW R and AW +RbAg RBAG +RBCs RBC's +RBIs RBI's +R&B R and B +R. B. RB +R.B. RB +rca RCA +R&C R and C +rc RC +R. C. RC +R.C. RC +RCTs RCT's +RdE RDE +R&D R and D +R. D. RD +R.D. RD +R.E.M.'s REM's +REPLs REPL's +R. E. RE +R.E. RE +R. F. RF +R.F. RF +R&G R and G +R. G. RG +R.G. RG +RHIBs RHIB's +Rhiw RHIW +rhl RHL +RhoG RHOG +Rho's RHO's +R. H. RH +R.H. RH +R&I R and I +R. I. RI +R.I. RI +riu RIU +Riu RIU +rivs RIVS +Rivu RIVU +Rixt RIXT +rjf RJF +R. J. RJ +R.J. RJ +R. K. RK +R.K. RK +R. L. RL +R.L. RL +rly RLY +RMDs RMD's +RMLs RML's +rm RM +R. M. RM +R.M. RM +RMSDs RMSD's +RMs RM's +rna RNA +RNAs RNA's +Rnet RNET +Rnic RNIC +RNNs RNN's +RNPs RNP's +R. N. RN +R.N. RN +rOmpB ROMPB +Rooi ROOI +R. O. RO +R.O. RO +ROVs ROV's +Roxb ROXB +Roxx ROXX +RP&C RP and C +rpc RPC +Rpe RPE +rpg RPG +RPGs RPG's +rpm RPM +R. P. RP +R.P. RP +RPs RP's +R. Q. RQ +R.Q. RQ +rra RRA +rrd RRD +Rreli RRELI +rre RRE +rRNA RRNA +rRNAs RRNA's +R&R R and R +rr RR +R. R. RR +R.R. RR +rsh RSH +Rsis RSI's +rsly RSLY +RsmA RSMA +RSpec RSPEC +rs RS +Rs R's +R. S. RS +R.S. RS +RSu RSU +rtb RTB +RTCs RTC's +RTEjr RTEJR +RTeOR RTEOR +RTOs RTO's +rtPA RTPA +rt RT +R. T. RT +R.T. RT +rts RTS +RTVFBiH RTVFBIH +Ruao RUAO +Rukn RUKN +Ruk RUK +Rupf RUPF +Rupr RUPR +rup RUP +Rup RUP +R. U. RU +R.U. RU +RutB RUTB +Ruu RUU +Ruwa RUWA +Ruy RUY +Ruyt RUYT +Rvat RVAT +Rvo RVO +R. V. RV +R.V. RV +rvs RVS +rwa RWA +Rwa RWA +rwb RWB +RWEs RWE's +rwjf RWJF +R. W. RW +R.W. RW +Rxa RXA +rz RZ +R. Z. RZ +Sa'id SAID +S. A. SA +S.A. SA +Saxl SAXL +Sa'yo SAYO +Sbai SBAI +sbc SBC +SBCs SBC's +sbk SBK +Sborz SBORZ +sb SB +S. B. SB +S.B. SB +Sbu SBU +scr SCR +SCRs SCR's +scry SCRY +sc SC +S. C. SC +S.C. SC +Scuf SCUF +Scymn SCYMN +SD&AE SD and AE +Sdei SDEI +sde SDE +Sde SDE +SDG&E SDG and E +sdk SDK +S. D. SD +S.D. SD +SDSM&T's SDSM and T's +Sejms SEJM's +sejr SEJR +Sekl SEKL +Sek SEK +S. E. SE +S.E. SE +ses SES +Ses SE's +SEs SE's +S.E.S.'s SES's +Seyh SEYH +sfadb SFADB +Sfax SFAX +SFFCo SFFCO +sfn SFN +'sf SF +sf SF +S. F. SF +S.F. SF +sfs SFS +S.F.'s SF's +SG&A SG and A +sgb SGB +Sgip SGIP +sgml SGML +sgra SGRA +S. G. SG +S.G. SG +SGSNs SGSN's +Sgt. sergeant +sgt SGT +S.H.I.E.L.D.'s SHIELD's +Shma SHMA +S. H. SH +S.H. SH +S. I. SI +S.I. SI +sjef SJEF +Sjon SJON +S. J. SJ +S.J. SJ +S. K. SK +S.K. SK +S.L.A.A.'s SLAA's +sla SLA +SLAs SLA's +S&L S and L +SLS&E SLS and E +S. L. SL +S.L. SL +SMe SME +SMEs SME's +SmI SMI +SMPSs SMPS's +smr SMR +S&M S and M +sm SM +S. M. SM +S.M. SM +S.M.'s SM's +SMs SMS +Smyl SMYL +SNESjr SNESJR +sngle SNGLE +SNPs SNP's +SnSe SNSE +S. N. SN +S.N. SN +S. O. SO +S.O. SO +Sos SO's +Sovn SOVN +Sov SOV +S&P 500 S and P five hundred +SPCAs SPCA's +Spe SPE +Spoa SPOA +SpPIn SPPIN +S&P S and P +S. P. SP +S.P. SP +SPs SP's +SPUs SPU's +sql SQL +sq SQ +S. Q. SQ +Sra SRA +SRAs SRA's +Srba SRBA +Srbi SRBI +SRBs SRB's +SRGs SRG's +Srhir SRHIR +SRLGs SRLG's +SRMs SRM's +Srni SRNI +srp SRP +sr SR +S. R. SR +S.R. SR +Srul SRUL +srx SRX +SSAs SSA's +SSDs SSD's +ssl SSL +SSoSV SSOSV +SSRIs SSRI's +S&S S and S +ss SS +S. s S's +Ss S's +S. S. SS +S.S. SS +Ssu SSU +ssw SSW +Ssy SSY +ST&AJ ST and AJ +STDs STD's +stfv STFV +STGs STG's +STIs STI's +STi STI +stl STL +Stryj STRYJ +S&T S and T +STScI STSCI +S. T. ST +S.T. ST +suo SUO +Suo SUO +S. U. SU +Susz SUSZ +SUTs SUT's +Suu SUU +SUVs SUV's +Suy SUY +Svac SVAC +svar SVAR +sve SVE +Sve SVE +SVMs SVM's +svn SVN +svom SVOM +sv SV +S. V. SV +S.V. SV +SVTs SVT's +Swe SWE +SwRI SWRI +S&W S and W +sw SW +S. W. SW +S.W. SW +sx SX +S.X. SX +SysML SYSML +SysRq SYSRQ +Sys SY's +SysV SYSV +S. Y. SY +S.Y. SY +SyT SYT +syv SYV +Syxx SYXX +Szasz SZASZ +Szer SZER +szkic SZKIC +Szklo SZKLO +Szlak SZLAK +SzMME SZMME +Szpir SZPIR +sz SZ +S. Z. SZ +Szu SZU +Szyk SZYK +taf TAF +Taf TAF +T. A. TA +T.A. TA +T&A T and A +TBCs TBC's +TBMs TBM's +tbh TBH +T. B. TB +T.B. TB +TCiAP TCIAP +TCKs TCK's +tc TC +T. C. TC +T.C. TC +TCUs TCU's +tcu TCU +tdb TDB +TDCi TDCI +TdIF TDIF +TDs TD's +T. D. TD +T.D. TD +Teatr TEATR +T. E. TE +T.E. TE +T&F T and F +tf TF +T. F. TF +T.F. TF +tge TGE +tgf TGF +tg TG +T. G. TG +T.G. TG +TGVs TGV's +tgv TGV +Thok THOK +ThSe THSE +T. H. TH +T.H. TH +T.I.'s TI's +T.I. TI +Tiu TIU +tiv TIV +Tiv TIV +Tiy's TIY's +T. J. TJ +T.J. TJ +t'ju TJU +tjz TJZ +tko TKO +Tko TKO +tk TK +T. K. TK +T.K. TK +Tlas TLA's +TlCu TLCU +TLDs TLD's +tli TLI +tlp TLP +TLRs TLR's +TLs TL's +tl TL +T. L. TL +T.L. TL +Tluk TLUK +Tluszcz TLUSZCZ +tmc TMC +TMGs TMG's +tmos TMOS +tmRNA TMRNA +TMSs TMS's +T.M.s TM's +tm TM +T. M. TM +T.M. TM +tna TNA +tnbc TNBC +TNTAs TNTA's +tn TN +T. N. TN +T.N. TN +T.O.'s TO's +T. O. TO +T.O. TO +tou TOU +Tou TOU +ToU TOU +tPA TPA +tpr TPR +tp TP +T. P. TP +T.P. TP +TPVs TPV's +T.Q. TQ +tra TRA +Tra TRA +Trbic TRBIC +Trcek TRCEK +TrkA TRKA +Trmcic TRMCIC +tRNA TRNA +TRPs TRP's +trs TRS +tr TR +T. R. TR +T.R. TR +truTV TRUTV +TruTV TRUTV +Trve TRVE +Tsa TSA +tsit TSIT +Tsiv TSIV +tso TSO +Tso TSO +tsr TSR +Ts T's +T. S. TS +T.S. TS +tsus TSUS +Tsvi TSVI +Tta TTA +tteok TTEOK +tte TTE +ttp TTP +T&T T and T +T. T. TT +T.T. TT +T'uqu TUQU +T.U. TU +Tuzk TUZK +tvaan TVAAN +Tvam TVAM +tva TVA +TVETs TVET's +TVii TVII +TVIn TVIN +TVi TVI +tvo TVO +Tvo TVO +TVRi TVRI +tvr TVR +TVSpy TVSPY +tvs TVS +TVs TV's +tVTA TVTA +tv TV +T. V. TV +T.V. TV +Twa TWA +Twi TWI +TWTs TWT's +T. W. TW +T.W. TW +TxDOT TXDOT +T.X. TX +tya TYA +Tyk TYK +Tza'ar TZAAR +Tze TZE +Tzrif TZRIF +Tzuh TZUH +Tzvi TZVI +Ua's UA's +UAs UA's +uat UAT +U.A. UA +UAVs UAV's +Uba UBA +UbcM UBCM +Ube UBE +ubi UBI +Ubi UBI +Ucar UCAR +Uca UCA +Ucmak UCMAK +Ucn UCN +uCs UC's +Uc UC +U.C. UC +Ucuncu UCUNCU +Uczta UCZTA +Uda UDA +Udit UDIT +Udny UDNY +UDTs UDT's +ud UD +Ud UD +U. D. UD +udu UDU +Udu UDU +Ueda's UEDA's +Uéda UE acute DA +U. E. UE +UFOs UFO's +Ufot UFOT +ufo UFO +Ufo UFO +U. F. UF +Uga UGA +Uge UGE +Ugni UGNI +Ugra UGRA +Ugrszke UGRSZKE +Ug UG +Uhha UHHA +uhn UHN +UHTCs UHTC's +Uhud UHUD +U.H. UH +uhur UHUR +Uiam UIAM +Uibh UIBH +Uible UIBLE +Uig UIG +uisae UISAE +UiS UIS +UiTM UITM +uit UIT +Uit UIT +Uiy UIY +Ujed UJED +Ujsag UJSAG +uj UJ +U.J. UJ +Uka UKA +Ukhra UKHRA +Ukic UKIC +Uki UKI +Ukoh UKOH +Uko UKO +ukr UKR +Ukr UKR +Ukui UKUI +u'k UK +uk UK +Uk UK +U.K. UK +Ukwu UKWU +Ulic ULIC +Ull ULL +Ulms ULM's +UlSU ULSU +Uluj ULUJ +U.L. UL +ulus ULUS +Ulus ULU's +Ulwa ULWA +Ulwe ULWE +Umbr UMBR +umelcu UMELCU +UMe UME +Umla UMLA +Umme UMME +Umno UMNO +Umri UMRI +U. M. UM +U.M. UM +Unli UNLI +Unlu UNLU +unm UNM +unnd UNND +UNSh UNSH +Unst UNST +Uns UN's +U.N.'s UN's +Unt UNT +U.N. UN +Unz UNZ +UOCl UOCL +UofM UOFM +Uoho UOHO +UoMs UOM's +uORF UORF +UoW UOW +UPBs UPB's +Upd UPD +Upf UPF +Upir UPIR +UPnP UPNP +Uppu UPPU +U.P. UP +UPyD UPYD +uq UQ +Urbz URBZ +URCs URC's +Urdd URDD +Urei UREI +urf URF +Urla URLA +URLhttp URLHTTP +URLs URL's +Usal USAL +usan USAN +Usan USAN +usao USAO +usata USATA +usa USA +.USA USA +USAya USAYA +USBs USB's +Uscie USCIE +U.S.C.'s USC's +USDoE USDOE +usd USD +USFbA USFBA +USF&WS USF and WS +UShs USH's +Usia USIA +usih USIH +Uslu USLU +Usmar USMAR +Usna USNA +Usnic USNIC +Usoi USOI +Usos USO's +Uso USO +USPs USP's +U.S.'s US's +Usti USTI +Ustka USTKA +Usui USUI +usum USUM +U. S. US +U.S. US +Usut USUT +Usvit USVIT +Uta's UTA's +uta UTA +Uta UTA +Utca UTCA +Utd's UTD's +Utd UTD +utea UTEA +Utes UTE's +Uth UTH +uti UTI +Uti UTI +Utne UTNE +Utnur UTNUR +Uto UTO +utrci UTRCI +utsav UTSAV +Utsav UTSAV +Utsu UTSU +U.T. UT +Utu UTU +Utva UTVA +Uuh UUH +Uul UUL +Uulu UULU +Uusi UUSI +UUs UU's +Uuto UUTO +Uvac UVAC +Uvea UVEA +uvnitr UVNITR +Uvo UVO +U.V. UV +Uwasa UWASA +UWFi UWFI +V. A. VA +V.A. VA +V&A V and A +Vav VAV +Vay VAY +vb VB +V. B. VB +V.B. VB +VCDs VCD's +VCRs VCR's +VCSELs VCSEL's +VCs VC's +VCTs VCT's +vc VC +V. C. VC +V.C. VC +vda VDA +Vda VDA +VDCs VDC's +vdiq VDIQ +vdm VDM +V. D. VD +V.D. VD +vez VEZ +Vez VEZ +vfp VFP +vfr VFR +V. F. VF +V.F. VF +V'Ger VGER +vgmdb VGMDB +VGo VGO +VGSCs VGSC's +VGSoM VGSOM +V. G. VG +V.G. VG +V. H. VH +V.I.C.'s VIC's +Vict VICT +viita VIITA +vijf VIJF +vij VIJ +Vij VIJ +V.I.P.s VIP's +VIPs VIP's +V. I. VI +V.I. VI +VJs VJ's +V. J. VJ +V.J. VJ +V. K. VK +V.K. VK +Vlah VLAH +VLCCs VLCC's +vlei VLEI +Vlijt VLIJT +V. L. VL +V.L. VL +VMAs VMA's +vm VM +V. M. VM +V.M. VM +vner VNER +V. N. VN +V.N. VN +V.O. VO +Vov VOV +Voz VOZ +vpis VPIS +VPNs VPN's +vpu VPU +V. P. VP +V.P. VP +vq VQ +vrak VRAK +Vrba VRBA +Vrbuv VRBUV +Vrej VREJ +vrem VREM +Vrin VRIN +vrj VRJ +vrn VRN +vroee VROEE +vrou VROU +vrouw VROUW +Vrouw VROUW +Vrsac VRSAC +Vrtis VRTI's +V&R V and R +vr VR +V. R. VR +V.R. VR +VSANs VSAN's +VSATs VSAT's +Vsekh VSEKH +vse VSE +vso VSO +VSPs VSP's +vs. versus +_vs._ versus +vsyo VSYO +VTE VT eL +Vtic VTIC +VTi VTI +V&T's V and T's +V&T V and T +V.T. VT +Vuur VUUR +VVIPs VVIP's +V. V. VV +V.V. VV +VWs VW's +Vyg VYG +vyr VYR +vy VY +vz VZ +V. Z. VZ +Waay WAAY +Wa'il WAIL +Wakf WAKF +wa'l WAL +waqf WAQF +waqt WAQT +Waqt WAQT +Wasl WASL +Watfa WATFA +wau WAU +Wau WAU +W. A. WA +W.A. WA +waw WAW +waza WAZA +Waza WAZA +WBs WB's +W. B. WB +W.B. WB +W&CBR W and CBR +wc WC +W. C. WC +W.C. WC +W. D. WD +W.D. WD +W. E. WE +W.E. WE +WF&NW WF and NW +W&F W and F +W. F. WF +W.F. WF +wga WGA +wgn WGN +wg WG +W. G. WG +W.G. WG +W&H W and H +W. H. WH +W.H. WH +Whyld WHYLD +Wica WICA +wici WICI +Wif WIF +Wijk WIJK +Wiwa WIWA +W. I. WI +wjaz WJAZ +W&J's W and J's +W&J W and J +W. J. WJ +W.J. WJ +W. K. WK +W.K. WK +W&LE W and LE +Wley WLEY +wlrs WLRS +W. L. WL +W.L. WL +wml WML +W&M W and M +W. M. WM +W.M. WM +W. N. WN +W.N. WN +wnzaa WNZAA +W&OD W and OD +Worh WORH +W. O. WO +W.O. WO +wpt WPT +wp WP +W. P. WP +W.P. WP +Wrec WREC +W. R. WR +W.R. WR +ws WS +W. S. WS +W.S. WS +WTs WT's +WTTs WTT's +W. T. WT +W.T. WT +WUAs WUA's +Wudl WUDL +Wuhr WUHR +Wuhu WUHU +Wu's WU's +wuv WUV +W. U. WU +Wuz WUZ +WVa WVA +W. V. WV +W.V. WV +Wwe WWE +W. W. WW +W.W. WW +wx WX +W.Y. WY +wyzc WYZC +W. Z. WZ +xbg XBG +Xbra XBRA +xCo XCO +X. C. XC +xda XDA +XDRs XDR's +X. D. XD +Xfce XFCE +xf XF +X.F. XF +xh XH +X. H. XH +xk XK +xmc XMC +xml XML +X.M. XM +xO XO +X.O. XO +XPe XPE +X.Q. XQ +XSi XSI +xsr XSR +XTwas XTWA's +xt XT +X. W. XW +xy XY +xyz XYZ +xyZ XYZ +Y. A. YA +Y.A. YA +Y. B. YB +Y.B. YB +ycia YCIA +ycie YCIE +yc YC +Y. C. YC +Y.C. YC +Ydby YDBY +Yde YDE +YdiB YDIB +yd YD +Y. D. YD +Y.D. YD +Y.E. YE +YFCs YFC's +yfle YFLE +YF&R YF and R +yg YG +Y. G. YG +Y.G. YG +Y. H. YH +Y. I. YI +Y.I. YI +yj YJ +Y. J. YJ +Ykt YKT +Y. K. YK +Y.K. YK +Yle YLE +yl YL +Y. L. YL +Ymke YMKE +ym YM +Y. M. YM +yndi YNDI +Yndi YNDI +yne YNE +Yngve YNGVE +Ynis YNI's +yn YN +Yn YN +Y. N. YN +Y.N. YN +Y.O. YO +ypa YPA +Y.P. YP +Y. Q. YQ +yra YRA +Yra YRA +Y&R Y and R +YSK'da YSKDA +YSK'ya YSKYA +Ys Y's +Y.S. YS +yta YTA +Ytre YTRE +Y&T Y and T +Y. T. YT +Y.T. YT +Y. V. YV +Y. W. YW +Y.Y. YY +Z. A. ZA +Z.A. ZA +Zdar ZDAR +zda ZDA +zg ZG +Z.G. ZG +zh ZH +Z. H. ZH +Z.H. ZH +zijn ZIJN +zij ZIJ +Zij ZIJ +Z. I. ZI +Z. J. ZJ +Z.K. ZK +Z.L. ZL +Z.M. ZM +ZnO ZNO +Zpav ZPAV +Z.P. ZP +ZrI ZRI +Zsuzsa ZSUZSA +Z.W. ZW +Z.X. ZX +Z. Y. ZY +Z.Y. ZY +Z. Z. ZZ +Z.Z. ZZ diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/graph_utils.py b/utils/speechio/nemo_text_processing/text_normalization/en/graph_utils.py new file mode 100644 index 0000000..6eca6f6 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/graph_utils.py @@ -0,0 +1,196 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright 2015 and onwards Google, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import string +from pathlib import Path +from typing import Dict + +import pynini +from nemo_text_processing.text_normalization.en.utils import get_abs_path +from pynini import Far +from pynini.examples import plurals +from pynini.export import export +from pynini.lib import byte, pynutil, utf8 + +NEMO_CHAR = utf8.VALID_UTF8_CHAR + +NEMO_DIGIT = byte.DIGIT +NEMO_LOWER = pynini.union(*string.ascii_lowercase).optimize() +NEMO_UPPER = pynini.union(*string.ascii_uppercase).optimize() +NEMO_ALPHA = pynini.union(NEMO_LOWER, NEMO_UPPER).optimize() +NEMO_ALNUM = pynini.union(NEMO_DIGIT, NEMO_ALPHA).optimize() +NEMO_HEX = pynini.union(*string.hexdigits).optimize() +NEMO_NON_BREAKING_SPACE = u"\u00A0" +NEMO_SPACE = " " +NEMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", u"\u00A0").optimize() +NEMO_NOT_SPACE = pynini.difference(NEMO_CHAR, NEMO_WHITE_SPACE).optimize() +NEMO_NOT_QUOTE = pynini.difference(NEMO_CHAR, r'"').optimize() + +NEMO_PUNCT = pynini.union(*map(pynini.escape, string.punctuation)).optimize() +NEMO_GRAPH = pynini.union(NEMO_ALNUM, NEMO_PUNCT).optimize() + +NEMO_SIGMA = pynini.closure(NEMO_CHAR) + +delete_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE)) +delete_zero_or_one_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE, 0, 1)) +insert_space = pynutil.insert(" ") +delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ") +delete_preserve_order = pynini.closure( + pynutil.delete(" preserve_order: true") + | (pynutil.delete(" field_order: \"") + NEMO_NOT_QUOTE + pynutil.delete("\"")) +) + +suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv")) +# _v = pynini.union("a", "e", "i", "o", "u") +_c = pynini.union( + "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z" +) +_ies = NEMO_SIGMA + _c + pynini.cross("y", "ies") +_es = NEMO_SIGMA + pynini.union("s", "sh", "ch", "x", "z") + pynutil.insert("es") +_s = NEMO_SIGMA + pynutil.insert("s") + +graph_plural = plurals._priority_union( + suppletive, plurals._priority_union(_ies, plurals._priority_union(_es, _s, NEMO_SIGMA), NEMO_SIGMA), NEMO_SIGMA +).optimize() + +SINGULAR_TO_PLURAL = graph_plural +PLURAL_TO_SINGULAR = pynini.invert(graph_plural) +TO_LOWER = pynini.union(*[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)]) +TO_UPPER = pynini.invert(TO_LOWER) +MIN_NEG_WEIGHT = -0.0001 +MIN_POS_WEIGHT = 0.0001 + + +def generator_main(file_name: str, graphs: Dict[str, 'pynini.FstLike']): + """ + Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name. + + Args: + file_name: exported file name + graphs: Mapping of a rule name and Pynini WFST graph to be exported + """ + exporter = export.Exporter(file_name) + for rule, graph in graphs.items(): + exporter[rule] = graph.optimize() + exporter.close() + print(f'Created {file_name}') + + +def get_plurals(fst): + """ + Given singular returns plurals + + Args: + fst: Fst + + Returns plurals to given singular forms + """ + return SINGULAR_TO_PLURAL @ fst + + +def get_singulars(fst): + """ + Given plural returns singulars + + Args: + fst: Fst + + Returns singulars to given plural forms + """ + return PLURAL_TO_SINGULAR @ fst + + +def convert_space(fst) -> 'pynini.FstLike': + """ + Converts space to nonbreaking space. + Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty" + This is making transducer significantly slower, so only use when there could be potential spaces within quotes, otherwise leave it. + + Args: + fst: input fst + + Returns output fst where breaking spaces are converted to non breaking spaces + """ + return fst @ pynini.cdrewrite(pynini.cross(NEMO_SPACE, NEMO_NON_BREAKING_SPACE), "", "", NEMO_SIGMA) + + +class GraphFst: + """ + Base class for all grammar fsts. + + Args: + name: name of grammar class + kind: either 'classify' or 'verbalize' + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, name: str, kind: str, deterministic: bool = True): + self.name = name + self.kind = str + self._fst = None + self.deterministic = deterministic + + self.far_path = Path(os.path.dirname(__file__) + '/grammars/' + kind + '/' + name + '.far') + if self.far_exist(): + self._fst = Far(self.far_path, mode="r", arc_type="standard", far_type="default").get_fst() + + def far_exist(self) -> bool: + """ + Returns true if FAR can be loaded + """ + return self.far_path.exists() + + @property + def fst(self) -> 'pynini.FstLike': + return self._fst + + @fst.setter + def fst(self, fst): + self._fst = fst + + def add_tokens(self, fst) -> 'pynini.FstLike': + """ + Wraps class name around to given fst + + Args: + fst: input fst + + Returns: + Fst: fst + """ + return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }") + + def delete_tokens(self, fst) -> 'pynini.FstLike': + """ + Deletes class name wrap around output of given fst + + Args: + fst: input fst + + Returns: + Fst: fst + """ + res = ( + pynutil.delete(f"{self.name}") + + delete_space + + pynutil.delete("{") + + delete_space + + fst + + delete_space + + pynutil.delete("}") + ) + return res @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "", NEMO_SIGMA) diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/abbreviation.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/abbreviation.py new file mode 100644 index 0000000..640bb48 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/abbreviation.py @@ -0,0 +1,50 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_UPPER, GraphFst, insert_space +from pynini.lib import pynutil + + +class AbbreviationFst(GraphFst): + """ + Finite state transducer for classifying electronic: as URLs, email addresses, etc. + e.g. "ABC" -> tokens { abbreviation { value: "A B C" } } + + Args: + whitelist: whitelist FST + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, whitelist: 'pynini.FstLike', deterministic: bool = True): + super().__init__(name="abbreviation", kind="classify", deterministic=deterministic) + + dot = pynini.accep(".") + # A.B.C. -> A. B. C. + graph = NEMO_UPPER + dot + pynini.closure(insert_space + NEMO_UPPER + dot, 1) + # A.B.C. -> A.B.C. + graph |= NEMO_UPPER + dot + pynini.closure(NEMO_UPPER + dot, 1) + # ABC -> A B C + graph |= NEMO_UPPER + pynini.closure(insert_space + NEMO_UPPER, 1) + + # exclude words that are included in the whitelist + graph = pynini.compose( + pynini.difference(pynini.project(graph, "input"), pynini.project(whitelist.graph, "input")), graph + ) + + graph = pynutil.insert("value: \"") + graph.optimize() + pynutil.insert("\"") + graph = self.add_tokens(graph) + self.fst = graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/cardinal.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/cardinal.py new file mode 100644 index 0000000..9b94143 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/cardinal.py @@ -0,0 +1,138 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_DIGIT, + NEMO_NOT_QUOTE, + NEMO_SIGMA, + GraphFst, + insert_space, +) +from nemo_text_processing.text_normalization.en.taggers.date import get_four_digit_year_graph +from nemo_text_processing.text_normalization.en.utils import get_abs_path +from pynini.examples import plurals +from pynini.lib import pynutil + + +class CardinalFst(GraphFst): + """ + Finite state transducer for classifying cardinals, e.g. + -23 -> cardinal { negative: "true" integer: "twenty three" } } + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True, lm: bool = False): + super().__init__(name="cardinal", kind="classify", deterministic=deterministic) + + self.lm = lm + self.deterministic = deterministic + # TODO replace to have "oh" as a default for "0" + graph = pynini.Far(get_abs_path("data/number/cardinal_number_name.far")).get_fst() + self.graph_hundred_component_at_least_one_none_zero_digit = ( + pynini.closure(NEMO_DIGIT, 2, 3) | pynini.difference(NEMO_DIGIT, pynini.accep("0")) + ) @ graph + + graph_digit = pynini.string_file(get_abs_path("data/number/digit.tsv")) + graph_zero = pynini.string_file(get_abs_path("data/number/zero.tsv")) + + single_digits_graph = pynini.invert(graph_digit | graph_zero) + self.single_digits_graph = single_digits_graph + pynini.closure(insert_space + single_digits_graph) + + if not deterministic: + # for a single token allow only the same normalization + # "007" -> {"oh oh seven", "zero zero seven"} not {"oh zero seven"} + single_digits_graph_zero = pynini.invert(graph_digit | graph_zero) + single_digits_graph_oh = pynini.invert(graph_digit) | pynini.cross("0", "oh") + + self.single_digits_graph = single_digits_graph_zero + pynini.closure( + insert_space + single_digits_graph_zero + ) + self.single_digits_graph |= single_digits_graph_oh + pynini.closure(insert_space + single_digits_graph_oh) + + single_digits_graph_with_commas = pynini.closure( + self.single_digits_graph + insert_space, 1, 3 + ) + pynini.closure( + pynutil.delete(",") + + single_digits_graph + + insert_space + + single_digits_graph + + insert_space + + single_digits_graph, + 1, + ) + + optional_minus_graph = pynini.closure(pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0, 1) + + graph = ( + pynini.closure(NEMO_DIGIT, 1, 3) + + (pynini.closure(pynutil.delete(",") + NEMO_DIGIT ** 3) | pynini.closure(NEMO_DIGIT ** 3)) + ) @ graph + + self.graph = graph + self.graph_with_and = self.add_optional_and(graph) + + if deterministic: + long_numbers = pynini.compose(NEMO_DIGIT ** (5, ...), self.single_digits_graph).optimize() + final_graph = plurals._priority_union(long_numbers, self.graph_with_and, NEMO_SIGMA).optimize() + cardinal_with_leading_zeros = pynini.compose( + pynini.accep("0") + pynini.closure(NEMO_DIGIT), self.single_digits_graph + ) + final_graph |= cardinal_with_leading_zeros + else: + leading_zeros = pynini.compose(pynini.closure(pynini.accep("0"), 1), self.single_digits_graph) + cardinal_with_leading_zeros = ( + leading_zeros + pynutil.insert(" ") + pynini.compose(pynini.closure(NEMO_DIGIT), self.graph_with_and) + ) + + # add small weight to non-default graphs to make sure the deterministic option is listed first + final_graph = ( + self.graph_with_and + | pynutil.add_weight(self.single_digits_graph, 0.0001) + | get_four_digit_year_graph() # allows e.g. 4567 be pronouced as forty five sixty seven + | pynutil.add_weight(single_digits_graph_with_commas, 0.0001) + | cardinal_with_leading_zeros + ) + + final_graph = optional_minus_graph + pynutil.insert("integer: \"") + final_graph + pynutil.insert("\"") + final_graph = self.add_tokens(final_graph) + self.fst = final_graph.optimize() + + def add_optional_and(self, graph): + graph_with_and = graph + + if not self.lm: + graph_with_and = pynutil.add_weight(graph, 0.00001) + not_quote = pynini.closure(NEMO_NOT_QUOTE) + no_thousand_million = pynini.difference( + not_quote, not_quote + pynini.union("thousand", "million") + not_quote + ).optimize() + integer = ( + not_quote + pynutil.add_weight(pynini.cross("hundred ", "hundred and ") + no_thousand_million, -0.0001) + ).optimize() + + no_hundred = pynini.difference(NEMO_SIGMA, not_quote + pynini.accep("hundred") + not_quote).optimize() + integer |= ( + not_quote + pynutil.add_weight(pynini.cross("thousand ", "thousand and ") + no_hundred, -0.0001) + ).optimize() + + optional_hundred = pynini.compose((NEMO_DIGIT - "0") ** 3, graph).optimize() + optional_hundred = pynini.compose(optional_hundred, NEMO_SIGMA + pynini.cross(" hundred", "") + NEMO_SIGMA) + graph_with_and |= pynini.compose(graph, integer).optimize() + graph_with_and |= optional_hundred + return graph_with_and diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/date.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/date.py new file mode 100644 index 0000000..2a580a8 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/date.py @@ -0,0 +1,370 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_CHAR, + NEMO_DIGIT, + NEMO_LOWER, + NEMO_SIGMA, + NEMO_NOT_QUOTE, + TO_LOWER, + GraphFst, + delete_extra_space, + delete_space, + insert_space, +) +from nemo_text_processing.text_normalization.en.utils import ( + augment_labels_with_punct_at_end, + get_abs_path, + load_labels, +) +from pynini.examples import plurals +from pynini.lib import pynutil + +graph_teen = pynini.invert(pynini.string_file(get_abs_path("data/number/teen.tsv"))).optimize() +graph_digit = pynini.invert(pynini.string_file(get_abs_path("data/number/digit.tsv"))).optimize() +ties_graph = pynini.invert(pynini.string_file(get_abs_path("data/number/ty.tsv"))).optimize() +year_suffix = load_labels(get_abs_path("data/date/year_suffix.tsv")) +year_suffix.extend(augment_labels_with_punct_at_end(year_suffix)) +year_suffix = pynini.string_map(year_suffix).optimize() + + +def get_ties_graph(deterministic: bool = True): + """ + Returns two digit transducer, e.g. + 03 -> o three + 12 -> thirteen + 20 -> twenty + """ + graph = graph_teen | ties_graph + pynutil.delete("0") | ties_graph + insert_space + graph_digit + + if deterministic: + graph = graph | pynini.cross("0", "o") + insert_space + graph_digit + else: + graph = graph | (pynini.cross("0", "o") | pynini.cross("0", "zero")) + insert_space + graph_digit + + return graph.optimize() + + +def get_four_digit_year_graph(deterministic: bool = True): + """ + Returns a four digit transducer which is combination of ties/teen or digits + (using hundred instead of thousand format), e.g. + 1219 -> twelve nineteen + 3900 -> thirty nine hundred + """ + graph_ties = get_ties_graph(deterministic) + + graph_with_s = ( + (graph_ties + insert_space + graph_ties) + | (graph_teen + insert_space + (ties_graph | pynini.cross("1", "ten"))) + ) + pynutil.delete("0s") + + graph_with_s |= (graph_teen | graph_ties) + insert_space + pynini.cross("00", "hundred") + pynutil.delete("s") + graph_with_s = graph_with_s @ pynini.cdrewrite( + pynini.cross("y", "ies") | pynutil.insert("s"), "", "[EOS]", NEMO_SIGMA + ) + + graph = graph_ties + insert_space + graph_ties + graph |= (graph_teen | graph_ties) + insert_space + pynini.cross("00", "hundred") + + thousand_graph = ( + graph_digit + + insert_space + + pynini.cross("00", "thousand") + + (pynutil.delete("0") | insert_space + graph_digit) + ) + thousand_graph |= ( + graph_digit + + insert_space + + pynini.cross("000", "thousand") + + pynini.closure(pynutil.delete(" "), 0, 1) + + pynini.accep("s") + ) + + graph |= graph_with_s + if deterministic: + graph = plurals._priority_union(thousand_graph, graph, NEMO_SIGMA) + else: + graph |= thousand_graph + + return graph.optimize() + + +def _get_two_digit_year_with_s_graph(): + # to handle '70s -> seventies + graph = ( + pynini.closure(pynutil.delete("'"), 0, 1) + + pynini.compose( + ties_graph + pynutil.delete("0s"), pynini.cdrewrite(pynini.cross("y", "ies"), "", "[EOS]", NEMO_SIGMA) + ) + ).optimize() + return graph + + +def _get_year_graph(cardinal_graph, deterministic: bool = True): + """ + Transducer for year, only from 1000 - 2999 e.g. + 1290 -> twelve nineteen + 2000 - 2009 will be verbalized as two thousand. + + Transducer for 3 digit year, e.g. 123-> one twenty three + + Transducer for year with suffix + 123 A.D., 4200 B.C + """ + graph = get_four_digit_year_graph(deterministic) + graph = (pynini.union("1", "2") + (NEMO_DIGIT ** 3) + pynini.closure(pynini.cross(" s", "s") | "s", 0, 1)) @ graph + + graph |= _get_two_digit_year_with_s_graph() + + three_digit_year = (NEMO_DIGIT @ cardinal_graph) + insert_space + (NEMO_DIGIT ** 2) @ cardinal_graph + year_with_suffix = ( + (get_four_digit_year_graph(deterministic=True) | three_digit_year) + delete_space + insert_space + year_suffix + ) + graph |= year_with_suffix + return graph.optimize() + + +def _get_two_digit_year(cardinal_graph, single_digits_graph): + wo_digit_year = NEMO_DIGIT ** (2) @ plurals._priority_union(cardinal_graph, single_digits_graph, NEMO_SIGMA) + return wo_digit_year + + +class DateFst(GraphFst): + """ + Finite state transducer for classifying date, e.g. + jan. 5, 2012 -> date { month: "january" day: "five" year: "twenty twelve" preserve_order: true } + jan. 5 -> date { month: "january" day: "five" preserve_order: true } + 5 january 2012 -> date { day: "five" month: "january" year: "twenty twelve" preserve_order: true } + 2012-01-05 -> date { year: "twenty twelve" month: "january" day: "five" } + 2012.01.05 -> date { year: "twenty twelve" month: "january" day: "five" } + 2012/01/05 -> date { year: "twenty twelve" month: "january" day: "five" } + 2012 -> date { year: "twenty twelve" } + + Args: + cardinal: CardinalFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, cardinal: GraphFst, deterministic: bool, lm: bool = False): + super().__init__(name="date", kind="classify", deterministic=deterministic) + + # january + month_graph = pynini.string_file(get_abs_path("data/date/month_name.tsv")).optimize() + # January, JANUARY + month_graph |= pynini.compose(TO_LOWER + pynini.closure(NEMO_CHAR), month_graph) | pynini.compose( + TO_LOWER ** (2, ...), month_graph + ) + + # jan + month_abbr_graph = pynini.string_file(get_abs_path("data/date/month_abbr.tsv")).optimize() + # jan, Jan, JAN + month_abbr_graph = ( + month_abbr_graph + | pynini.compose(TO_LOWER + pynini.closure(NEMO_LOWER, 1), month_abbr_graph).optimize() + | pynini.compose(TO_LOWER ** (2, ...), month_abbr_graph).optimize() + ) + pynini.closure(pynutil.delete("."), 0, 1) + month_graph |= month_abbr_graph.optimize() + + month_numbers_labels = pynini.string_file(get_abs_path("data/date/month_number.tsv")).optimize() + cardinal_graph = cardinal.graph_hundred_component_at_least_one_none_zero_digit + + year_graph = _get_year_graph(cardinal_graph=cardinal_graph, deterministic=deterministic) + + # three_digit_year = (NEMO_DIGIT @ cardinal_graph) + insert_space + (NEMO_DIGIT ** 2) @ cardinal_graph + # year_graph |= three_digit_year + + month_graph = pynutil.insert("month: \"") + month_graph + pynutil.insert("\"") + month_numbers_graph = pynutil.insert("month: \"") + month_numbers_labels + pynutil.insert("\"") + + endings = ["rd", "th", "st", "nd"] + endings += [x.upper() for x in endings] + endings = pynini.union(*endings) + + day_graph = ( + pynutil.insert("day: \"") + + pynini.closure(pynutil.delete("the "), 0, 1) + + ( + ((pynini.union("1", "2") + NEMO_DIGIT) | NEMO_DIGIT | (pynini.accep("3") + pynini.union("0", "1"))) + + pynini.closure(pynutil.delete(endings), 0, 1) + ) + @ cardinal_graph + + pynutil.insert("\"") + ) + + two_digit_year = _get_two_digit_year( + cardinal_graph=cardinal_graph, single_digits_graph=cardinal.single_digits_graph + ) + two_digit_year = pynutil.insert("year: \"") + two_digit_year + pynutil.insert("\"") + + # if lm: + # two_digit_year = pynini.compose(pynini.difference(NEMO_DIGIT, "0") + NEMO_DIGIT ** (3), two_digit_year) + # year_graph = pynini.compose(pynini.difference(NEMO_DIGIT, "0") + NEMO_DIGIT ** (2), year_graph) + # year_graph |= pynini.compose(pynini.difference(NEMO_DIGIT, "0") + NEMO_DIGIT ** (4, ...), year_graph) + + graph_year = pynutil.insert(" year: \"") + pynutil.delete(" ") + year_graph + pynutil.insert("\"") + graph_year |= ( + pynutil.insert(" year: \"") + + pynini.accep(",") + + pynini.closure(pynini.accep(" "), 0, 1) + + year_graph + + pynutil.insert("\"") + ) + optional_graph_year = pynini.closure(graph_year, 0, 1) + + year_graph = pynutil.insert("year: \"") + year_graph + pynutil.insert("\"") + + graph_mdy = month_graph + ( + (delete_extra_space + day_graph) + | (pynini.accep(" ") + day_graph) + | graph_year + | (delete_extra_space + day_graph + graph_year) + ) + + graph_mdy |= ( + month_graph + + pynini.cross("-", " ") + + day_graph + + pynini.closure(((pynini.cross("-", " ") + NEMO_SIGMA) @ graph_year), 0, 1) + ) + + for x in ["-", "/", "."]: + delete_sep = pynutil.delete(x) + graph_mdy |= ( + month_numbers_graph + + delete_sep + + insert_space + + pynini.closure(pynutil.delete("0"), 0, 1) + + day_graph + + delete_sep + + insert_space + + (year_graph | two_digit_year) + ) + + graph_dmy = day_graph + delete_extra_space + month_graph + optional_graph_year + day_ex_month = (NEMO_DIGIT ** 2 - pynini.project(month_numbers_graph, "input")) @ day_graph + for x in ["-", "/", "."]: + delete_sep = pynutil.delete(x) + graph_dmy |= ( + day_ex_month + + delete_sep + + insert_space + + month_numbers_graph + + delete_sep + + insert_space + + (year_graph | two_digit_year) + ) + + graph_ymd = pynini.accep("") + for x in ["-", "/", "."]: + delete_sep = pynutil.delete(x) + graph_ymd |= ( + (year_graph | two_digit_year) + + delete_sep + + insert_space + + month_numbers_graph + + delete_sep + + insert_space + + pynini.closure(pynutil.delete("0"), 0, 1) + + day_graph + ) + + final_graph = graph_mdy | graph_dmy + + if not deterministic or lm: + final_graph += pynini.closure(pynutil.insert(" preserve_order: true"), 0, 1) + m_sep_d = ( + month_numbers_graph + + pynutil.delete(pynini.union("-", "/")) + + insert_space + + pynini.closure(pynutil.delete("0"), 0, 1) + + day_graph + ) + final_graph |= m_sep_d + else: + final_graph += pynutil.insert(" preserve_order: true") + + final_graph |= graph_ymd | year_graph + + if not deterministic or lm: + ymd_to_mdy_graph = None + ymd_to_dmy_graph = None + mdy_to_dmy_graph = None + md_to_dm_graph = None + + for month in [x[0] for x in load_labels(get_abs_path("data/date/month_name.tsv"))]: + for day in [x[0] for x in load_labels(get_abs_path("data/date/day.tsv"))]: + ymd_to_mdy_curr = ( + pynutil.insert("month: \"" + month + "\" day: \"" + day + "\" ") + + pynini.accep('year:') + + NEMO_SIGMA + + pynutil.delete(" month: \"" + month + "\" day: \"" + day + "\"") + ) + + # YY-MM-DD -> MM-DD-YY + ymd_to_mdy_curr = pynini.compose(graph_ymd, ymd_to_mdy_curr) + ymd_to_mdy_graph = ( + ymd_to_mdy_curr + if ymd_to_mdy_graph is None + else pynini.union(ymd_to_mdy_curr, ymd_to_mdy_graph) + ) + + ymd_to_dmy_curr = ( + pynutil.insert("day: \"" + day + "\" month: \"" + month + "\" ") + + pynini.accep('year:') + + NEMO_SIGMA + + pynutil.delete(" month: \"" + month + "\" day: \"" + day + "\"") + ) + + # YY-MM-DD -> MM-DD-YY + ymd_to_dmy_curr = pynini.compose(graph_ymd, ymd_to_dmy_curr).optimize() + ymd_to_dmy_graph = ( + ymd_to_dmy_curr + if ymd_to_dmy_graph is None + else pynini.union(ymd_to_dmy_curr, ymd_to_dmy_graph) + ) + + mdy_to_dmy_curr = ( + pynutil.insert("day: \"" + day + "\" month: \"" + month + "\" ") + + pynutil.delete("month: \"" + month + "\" day: \"" + day + "\" ") + + pynini.accep('year:') + + NEMO_SIGMA + ).optimize() + # MM-DD-YY -> verbalize as MM-DD-YY (February fourth 1991) or DD-MM-YY (the fourth of February 1991) + mdy_to_dmy_curr = pynini.compose(graph_mdy, mdy_to_dmy_curr).optimize() + mdy_to_dmy_graph = ( + mdy_to_dmy_curr + if mdy_to_dmy_graph is None + else pynini.union(mdy_to_dmy_curr, mdy_to_dmy_graph).optimize() + ).optimize() + + md_to_dm_curr = pynutil.insert("day: \"" + day + "\" month: \"" + month + "\"") + pynutil.delete( + "month: \"" + month + "\" day: \"" + day + "\"" + ) + md_to_dm_curr = pynini.compose(m_sep_d, md_to_dm_curr).optimize() + + md_to_dm_graph = ( + md_to_dm_curr + if md_to_dm_graph is None + else pynini.union(md_to_dm_curr, md_to_dm_graph).optimize() + ).optimize() + + final_graph |= mdy_to_dmy_graph | md_to_dm_graph | ymd_to_mdy_graph | ymd_to_dmy_graph + + final_graph = self.add_tokens(final_graph) + self.fst = final_graph.optimize() + diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/decimal.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/decimal.py new file mode 100644 index 0000000..2486b5f --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/decimal.py @@ -0,0 +1,129 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_SIGMA, TO_UPPER, GraphFst, get_abs_path +from pynini.lib import pynutil + +delete_space = pynutil.delete(" ") +quantities = pynini.string_file(get_abs_path("data/number/thousand.tsv")) +quantities_abbr = pynini.string_file(get_abs_path("data/number/quantity_abbr.tsv")) +quantities_abbr |= TO_UPPER @ quantities_abbr + + +def get_quantity( + decimal: 'pynini.FstLike', cardinal_up_to_hundred: 'pynini.FstLike', include_abbr: bool +) -> 'pynini.FstLike': + """ + Returns FST that transforms either a cardinal or decimal followed by a quantity into a numeral, + e.g. 1 million -> integer_part: "one" quantity: "million" + e.g. 1.5 million -> integer_part: "one" fractional_part: "five" quantity: "million" + + Args: + decimal: decimal FST + cardinal_up_to_hundred: cardinal FST + """ + quantity_wo_thousand = pynini.project(quantities, "input") - pynini.union("k", "K", "thousand") + if include_abbr: + quantity_wo_thousand |= pynini.project(quantities_abbr, "input") - pynini.union("k", "K", "thousand") + res = ( + pynutil.insert("integer_part: \"") + + cardinal_up_to_hundred + + pynutil.insert("\"") + + pynini.closure(pynutil.delete(" "), 0, 1) + + pynutil.insert(" quantity: \"") + + (quantity_wo_thousand @ (quantities | quantities_abbr)) + + pynutil.insert("\"") + ) + if include_abbr: + quantity = quantities | quantities_abbr + else: + quantity = quantities + res |= ( + decimal + + pynini.closure(pynutil.delete(" "), 0, 1) + + pynutil.insert("quantity: \"") + + quantity + + pynutil.insert("\"") + ) + return res + + +class DecimalFst(GraphFst): + """ + Finite state transducer for classifying decimal, e.g. + -12.5006 billion -> decimal { negative: "true" integer_part: "12" fractional_part: "five o o six" quantity: "billion" } + 1 billion -> decimal { integer_part: "one" quantity: "billion" } + + cardinal: CardinalFst + """ + + def __init__(self, cardinal: GraphFst, deterministic: bool): + super().__init__(name="decimal", kind="classify", deterministic=deterministic) + + cardinal_graph = cardinal.graph_with_and + cardinal_graph_hundred_component_at_least_one_none_zero_digit = ( + cardinal.graph_hundred_component_at_least_one_none_zero_digit + ) + + self.graph = cardinal.single_digits_graph.optimize() + + if not deterministic: + self.graph = self.graph | cardinal_graph + + point = pynutil.delete(".") + optional_graph_negative = pynini.closure(pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0, 1) + + self.graph_fractional = pynutil.insert("fractional_part: \"") + self.graph + pynutil.insert("\"") + self.graph_integer = pynutil.insert("integer_part: \"") + cardinal_graph + pynutil.insert("\"") + final_graph_wo_sign = ( + pynini.closure(self.graph_integer + pynutil.insert(" "), 0, 1) + + point + + pynutil.insert(" ") + + self.graph_fractional + ) + + quantity_w_abbr = get_quantity( + final_graph_wo_sign, cardinal_graph_hundred_component_at_least_one_none_zero_digit, include_abbr=True + ) + quantity_wo_abbr = get_quantity( + final_graph_wo_sign, cardinal_graph_hundred_component_at_least_one_none_zero_digit, include_abbr=False + ) + self.final_graph_wo_negative_w_abbr = final_graph_wo_sign | quantity_w_abbr + self.final_graph_wo_negative = final_graph_wo_sign | quantity_wo_abbr + + # reduce options for non_deterministic and allow either "oh" or "zero", but not combination + if not deterministic: + no_oh_zero = pynini.difference( + NEMO_SIGMA, + (NEMO_SIGMA + "oh" + NEMO_SIGMA + "zero" + NEMO_SIGMA) + | (NEMO_SIGMA + "zero" + NEMO_SIGMA + "oh" + NEMO_SIGMA), + ).optimize() + no_zero_oh = pynini.difference( + NEMO_SIGMA, NEMO_SIGMA + pynini.accep("zero") + NEMO_SIGMA + pynini.accep("oh") + NEMO_SIGMA + ).optimize() + + self.final_graph_wo_negative |= pynini.compose( + self.final_graph_wo_negative, + pynini.cdrewrite( + pynini.cross("integer_part: \"zero\"", "integer_part: \"oh\""), NEMO_SIGMA, NEMO_SIGMA, NEMO_SIGMA + ), + ) + self.final_graph_wo_negative = pynini.compose(self.final_graph_wo_negative, no_oh_zero).optimize() + self.final_graph_wo_negative = pynini.compose(self.final_graph_wo_negative, no_zero_oh).optimize() + + final_graph = optional_graph_negative + self.final_graph_wo_negative + + final_graph = self.add_tokens(final_graph) + self.fst = final_graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/electronic.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/electronic.py new file mode 100644 index 0000000..243c065 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/electronic.py @@ -0,0 +1,87 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_ALPHA, + NEMO_DIGIT, + NEMO_SIGMA, + GraphFst, + get_abs_path, + insert_space, +) +from pynini.lib import pynutil + + +class ElectronicFst(GraphFst): + """ + Finite state transducer for classifying electronic: as URLs, email addresses, etc. + e.g. cdf1@abc.edu -> tokens { electronic { username: "cdf1" domain: "abc.edu" } } + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="electronic", kind="classify", deterministic=deterministic) + + accepted_symbols = pynini.project(pynini.string_file(get_abs_path("data/electronic/symbol.tsv")), "input") + accepted_common_domains = pynini.project( + pynini.string_file(get_abs_path("data/electronic/domain.tsv")), "input" + ) + all_accepted_symbols = NEMO_ALPHA + pynini.closure(NEMO_ALPHA | NEMO_DIGIT | accepted_symbols) + graph_symbols = pynini.string_file(get_abs_path("data/electronic/symbol.tsv")).optimize() + + username = ( + pynutil.insert("username: \"") + all_accepted_symbols + pynutil.insert("\"") + pynini.cross('@', ' ') + ) + domain_graph = all_accepted_symbols + pynini.accep('.') + all_accepted_symbols + NEMO_ALPHA + protocol_symbols = pynini.closure((graph_symbols | pynini.cross(":", "semicolon")) + pynutil.insert(" ")) + protocol_start = (pynini.cross("https", "HTTPS ") | pynini.cross("http", "HTTP ")) + ( + pynini.accep("://") @ protocol_symbols + ) + protocol_file_start = pynini.accep("file") + insert_space + (pynini.accep(":///") @ protocol_symbols) + + protocol_end = pynini.cross("www", "WWW ") + pynini.accep(".") @ protocol_symbols + protocol = protocol_file_start | protocol_start | protocol_end | (protocol_start + protocol_end) + + domain_graph = ( + pynutil.insert("domain: \"") + + pynini.difference(domain_graph, pynini.project(protocol, "input") + NEMO_SIGMA) + + pynutil.insert("\"") + ) + domain_common_graph = ( + pynutil.insert("domain: \"") + + pynini.difference( + all_accepted_symbols + + accepted_common_domains + + pynini.closure(accepted_symbols + pynini.closure(NEMO_ALPHA | NEMO_DIGIT | accepted_symbols), 0, 1), + pynini.project(protocol, "input") + NEMO_SIGMA, + ) + + pynutil.insert("\"") + ) + + protocol = pynutil.insert("protocol: \"") + protocol + pynutil.insert("\"") + # email + graph = username + domain_graph + # abc.com, abc.com/123-sm + graph |= domain_common_graph + # www.abc.com/sdafsdf, or https://www.abc.com/asdfad or www.abc.abc/asdfad + graph |= protocol + pynutil.insert(" ") + domain_graph + + final_graph = self.add_tokens(graph) + + self.fst = final_graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/fraction.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/fraction.py new file mode 100644 index 0000000..ac6877c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/fraction.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, get_abs_path +from pynini.lib import pynutil + + +class FractionFst(GraphFst): + """ + Finite state transducer for classifying fraction + "23 4/5" -> + tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } + "23 4/5th" -> + tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, cardinal, deterministic: bool = True): + super().__init__(name="fraction", kind="classify", deterministic=deterministic) + cardinal_graph = cardinal.graph + + integer = pynutil.insert("integer_part: \"") + cardinal_graph + pynutil.insert("\"") + numerator = ( + pynutil.insert("numerator: \"") + cardinal_graph + (pynini.cross("/", "\" ") | pynini.cross(" / ", "\" ")) + ) + + endings = ["rd", "th", "st", "nd"] + endings += [x.upper() for x in endings] + optional_end = pynini.closure(pynini.cross(pynini.union(*endings), ""), 0, 1) + + denominator = pynutil.insert("denominator: \"") + cardinal_graph + optional_end + pynutil.insert("\"") + + graph = pynini.closure(integer + pynini.accep(" "), 0, 1) + (numerator + denominator) + graph |= pynini.closure(integer + (pynini.accep(" ") | pynutil.insert(" ")), 0, 1) + pynini.compose( + pynini.string_file(get_abs_path("data/number/fraction.tsv")), (numerator + denominator) + ) + + self.graph = graph + final_graph = self.add_tokens(self.graph) + self.fst = final_graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/measure.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/measure.py new file mode 100644 index 0000000..3861f91 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/measure.py @@ -0,0 +1,304 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_ALPHA, + NEMO_DIGIT, + NEMO_NON_BREAKING_SPACE, + NEMO_SIGMA, + NEMO_SPACE, + NEMO_UPPER, + SINGULAR_TO_PLURAL, + TO_LOWER, + GraphFst, + convert_space, + delete_space, + delete_zero_or_one_space, + insert_space, +) +from nemo_text_processing.text_normalization.en.taggers.ordinal import OrdinalFst as OrdinalTagger +from nemo_text_processing.text_normalization.en.taggers.whitelist import get_formats +from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst as OrdinalVerbalizer +from pynini.examples import plurals +from pynini.lib import pynutil + + +class MeasureFst(GraphFst): + """ + Finite state transducer for classifying measure, suppletive aware, e.g. + -12kg -> measure { negative: "true" cardinal { integer: "twelve" } units: "kilograms" } + 1kg -> measure { cardinal { integer: "one" } units: "kilogram" } + .5kg -> measure { decimal { fractional_part: "five" } units: "kilograms" } + + Args: + cardinal: CardinalFst + decimal: DecimalFst + fraction: FractionFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, cardinal: GraphFst, decimal: GraphFst, fraction: GraphFst, deterministic: bool = True): + super().__init__(name="measure", kind="classify", deterministic=deterministic) + cardinal_graph = cardinal.graph_with_and | self.get_range(cardinal.graph_with_and) + + graph_unit = pynini.string_file(get_abs_path("data/measure/unit.tsv")) + if not deterministic: + graph_unit |= pynini.string_file(get_abs_path("data/measure/unit_alternatives.tsv")) + + graph_unit |= pynini.compose( + pynini.closure(TO_LOWER, 1) + (NEMO_ALPHA | TO_LOWER) + pynini.closure(NEMO_ALPHA | TO_LOWER), graph_unit + ).optimize() + + graph_unit_plural = convert_space(graph_unit @ SINGULAR_TO_PLURAL) + graph_unit = convert_space(graph_unit) + + optional_graph_negative = pynini.closure(pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0, 1) + + graph_unit2 = ( + pynini.cross("/", "per") + delete_zero_or_one_space + pynutil.insert(NEMO_NON_BREAKING_SPACE) + graph_unit + ) + + optional_graph_unit2 = pynini.closure( + delete_zero_or_one_space + pynutil.insert(NEMO_NON_BREAKING_SPACE) + graph_unit2, 0, 1, + ) + + unit_plural = ( + pynutil.insert("units: \"") + + (graph_unit_plural + optional_graph_unit2 | graph_unit2) + + pynutil.insert("\"") + ) + + unit_singular = ( + pynutil.insert("units: \"") + (graph_unit + optional_graph_unit2 | graph_unit2) + pynutil.insert("\"") + ) + + subgraph_decimal = ( + pynutil.insert("decimal { ") + + optional_graph_negative + + decimal.final_graph_wo_negative + + delete_space + + pynutil.insert(" } ") + + unit_plural + ) + + # support radio FM/AM + subgraph_decimal |= ( + pynutil.insert("decimal { ") + + decimal.final_graph_wo_negative + + delete_space + + pynutil.insert(" } ") + + pynutil.insert("units: \"") + + pynini.union("AM", "FM") + + pynutil.insert("\"") + ) + + subgraph_cardinal = ( + pynutil.insert("cardinal { ") + + optional_graph_negative + + pynutil.insert("integer: \"") + + ((NEMO_SIGMA - "1") @ cardinal_graph) + + delete_space + + pynutil.insert("\"") + + pynutil.insert(" } ") + + unit_plural + ) + + subgraph_cardinal |= ( + pynutil.insert("cardinal { ") + + optional_graph_negative + + pynutil.insert("integer: \"") + + pynini.cross("1", "one") + + delete_space + + pynutil.insert("\"") + + pynutil.insert(" } ") + + unit_singular + ) + + unit_graph = ( + pynutil.insert("cardinal { integer: \"-\" } units: \"") + + pynini.cross(pynini.union("/", "per"), "per") + + delete_zero_or_one_space + + pynutil.insert(NEMO_NON_BREAKING_SPACE) + + graph_unit + + pynutil.insert("\" preserve_order: true") + ) + + decimal_dash_alpha = ( + pynutil.insert("decimal { ") + + decimal.final_graph_wo_negative + + pynini.cross('-', '') + + pynutil.insert(" } units: \"") + + pynini.closure(NEMO_ALPHA, 1) + + pynutil.insert("\"") + ) + + decimal_times = ( + pynutil.insert("decimal { ") + + decimal.final_graph_wo_negative + + pynutil.insert(" } units: \"") + + pynini.cross(pynini.union('x', "X"), 'x') + + pynutil.insert("\"") + ) + + alpha_dash_decimal = ( + pynutil.insert("units: \"") + + pynini.closure(NEMO_ALPHA, 1) + + pynini.accep('-') + + pynutil.insert("\"") + + pynutil.insert(" decimal { ") + + decimal.final_graph_wo_negative + + pynutil.insert(" } preserve_order: true") + ) + + subgraph_fraction = ( + pynutil.insert("fraction { ") + fraction.graph + delete_space + pynutil.insert(" } ") + unit_plural + ) + + address = self.get_address_graph(cardinal) + address = ( + pynutil.insert("units: \"address\" cardinal { integer: \"") + + address + + pynutil.insert("\" } preserve_order: true") + ) + + math_operations = pynini.string_file(get_abs_path("data/measure/math_operation.tsv")) + delimiter = pynini.accep(" ") | pynutil.insert(" ") + + math = ( + (cardinal_graph | NEMO_ALPHA) + + delimiter + + math_operations + + (delimiter | NEMO_ALPHA) + + cardinal_graph + + delimiter + + pynini.cross("=", "equals") + + delimiter + + (cardinal_graph | NEMO_ALPHA) + ) + + math |= ( + (cardinal_graph | NEMO_ALPHA) + + delimiter + + pynini.cross("=", "equals") + + delimiter + + (cardinal_graph | NEMO_ALPHA) + + delimiter + + math_operations + + delimiter + + cardinal_graph + ) + + math = ( + pynutil.insert("units: \"math\" cardinal { integer: \"") + + math + + pynutil.insert("\" } preserve_order: true") + ) + final_graph = ( + subgraph_decimal + | subgraph_cardinal + | unit_graph + | decimal_dash_alpha + | decimal_times + | alpha_dash_decimal + | subgraph_fraction + | address + | math + ) + + final_graph = self.add_tokens(final_graph) + self.fst = final_graph.optimize() + + def get_range(self, cardinal: GraphFst): + """ + Returns range forms for measure tagger, e.g. 2-3, 2x3, 2*2 + + Args: + cardinal: cardinal GraphFst + """ + range_graph = cardinal + pynini.cross(pynini.union("-", " - "), " to ") + cardinal + + for x in [" x ", "x"]: + range_graph |= cardinal + pynini.cross(x, " by ") + cardinal + if not self.deterministic: + range_graph |= cardinal + pynini.cross(x, " times ") + cardinal + + for x in ["*", " * "]: + range_graph |= cardinal + pynini.cross(x, " times ") + cardinal + return range_graph.optimize() + + def get_address_graph(self, cardinal): + """ + Finite state transducer for classifying serial. + The serial is a combination of digits, letters and dashes, e.g.: + 2788 San Tomas Expy, Santa Clara, CA 95051 -> + units: "address" cardinal + { integer: "two seven eight eight San Tomas Expressway Santa Clara California nine five zero five one" } + preserve_order: true + """ + ordinal_verbalizer = OrdinalVerbalizer().graph + ordinal_tagger = OrdinalTagger(cardinal=cardinal).graph + ordinal_num = pynini.compose( + pynutil.insert("integer: \"") + ordinal_tagger + pynutil.insert("\""), ordinal_verbalizer + ) + + address_num = NEMO_DIGIT ** (1, 2) @ cardinal.graph_hundred_component_at_least_one_none_zero_digit + address_num += insert_space + NEMO_DIGIT ** 2 @ ( + pynini.closure(pynini.cross("0", "zero "), 0, 1) + + cardinal.graph_hundred_component_at_least_one_none_zero_digit + ) + # to handle the rest of the numbers + address_num = pynini.compose(NEMO_DIGIT ** (3, 4), address_num) + address_num = plurals._priority_union(address_num, cardinal.graph, NEMO_SIGMA) + + direction = ( + pynini.cross("E", "East") + | pynini.cross("S", "South") + | pynini.cross("W", "West") + | pynini.cross("N", "North") + ) + pynini.closure(pynutil.delete("."), 0, 1) + + direction = pynini.closure(pynini.accep(NEMO_SPACE) + direction, 0, 1) + address_words = get_formats(get_abs_path("data/address/address_word.tsv")) + address_words = ( + pynini.accep(NEMO_SPACE) + + (pynini.closure(ordinal_num, 0, 1) | NEMO_UPPER + pynini.closure(NEMO_ALPHA, 1)) + + NEMO_SPACE + + pynini.closure(NEMO_UPPER + pynini.closure(NEMO_ALPHA) + NEMO_SPACE) + + address_words + ) + + city = pynini.closure(NEMO_ALPHA | pynini.accep(NEMO_SPACE), 1) + city = pynini.closure(pynini.accep(",") + pynini.accep(NEMO_SPACE) + city, 0, 1) + + states = load_labels(get_abs_path("data/address/state.tsv")) + + additional_options = [] + for x, y in states: + additional_options.append((x, f"{y[0]}.{y[1:]}")) + states.extend(additional_options) + state_graph = pynini.string_map(states) + state = pynini.invert(state_graph) + state = pynini.closure(pynini.accep(",") + pynini.accep(NEMO_SPACE) + state, 0, 1) + + zip_code = pynini.compose(NEMO_DIGIT ** 5, cardinal.single_digits_graph) + zip_code = pynini.closure(pynini.closure(pynini.accep(","), 0, 1) + pynini.accep(NEMO_SPACE) + zip_code, 0, 1,) + + address = address_num + direction + address_words + pynini.closure(city + state + zip_code, 0, 1) + + address |= address_num + direction + address_words + pynini.closure(pynini.cross(".", ""), 0, 1) + + return address diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/money.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/money.py new file mode 100644 index 0000000..43e26bd --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/money.py @@ -0,0 +1,192 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_ALPHA, + NEMO_DIGIT, + NEMO_SIGMA, + SINGULAR_TO_PLURAL, + GraphFst, + convert_space, + insert_space, +) +from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels +from pynini.lib import pynutil + +min_singular = pynini.string_file(get_abs_path("data/money/currency_minor_singular.tsv")) +min_plural = pynini.string_file(get_abs_path("data/money/currency_minor_plural.tsv")) +maj_singular = pynini.string_file((get_abs_path("data/money/currency_major.tsv"))) + + +class MoneyFst(GraphFst): + """ + Finite state transducer for classifying money, suppletive aware, e.g. + $12.05 -> money { integer_part: "twelve" currency_maj: "dollars" fractional_part: "five" currency_min: "cents" preserve_order: true } + $12.0500 -> money { integer_part: "twelve" currency_maj: "dollars" fractional_part: "five" currency_min: "cents" preserve_order: true } + $1 -> money { currency_maj: "dollar" integer_part: "one" } + $1.00 -> money { currency_maj: "dollar" integer_part: "one" } + $0.05 -> money { fractional_part: "five" currency_min: "cents" preserve_order: true } + $1 million -> money { currency_maj: "dollars" integer_part: "one" quantity: "million" } + $1.2 million -> money { currency_maj: "dollars" integer_part: "one" fractional_part: "two" quantity: "million" } + $1.2320 -> money { currency_maj: "dollars" integer_part: "one" fractional_part: "two three two" } + + Args: + cardinal: CardinalFst + decimal: DecimalFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, cardinal: GraphFst, decimal: GraphFst, deterministic: bool = True): + super().__init__(name="money", kind="classify", deterministic=deterministic) + cardinal_graph = cardinal.graph_with_and + graph_decimal_final = decimal.final_graph_wo_negative_w_abbr + + maj_singular_labels = load_labels(get_abs_path("data/money/currency_major.tsv")) + maj_unit_plural = convert_space(maj_singular @ SINGULAR_TO_PLURAL) + maj_unit_singular = convert_space(maj_singular) + + graph_maj_singular = pynutil.insert("currency_maj: \"") + maj_unit_singular + pynutil.insert("\"") + graph_maj_plural = pynutil.insert("currency_maj: \"") + maj_unit_plural + pynutil.insert("\"") + + optional_delete_fractional_zeros = pynini.closure( + pynutil.delete(".") + pynini.closure(pynutil.delete("0"), 1), 0, 1 + ) + + graph_integer_one = pynutil.insert("integer_part: \"") + pynini.cross("1", "one") + pynutil.insert("\"") + # only for decimals where third decimal after comma is non-zero or with quantity + decimal_delete_last_zeros = ( + pynini.closure(NEMO_DIGIT | pynutil.delete(",")) + + pynini.accep(".") + + pynini.closure(NEMO_DIGIT, 2) + + (NEMO_DIGIT - "0") + + pynini.closure(pynutil.delete("0")) + ) + decimal_with_quantity = NEMO_SIGMA + NEMO_ALPHA + + graph_decimal = ( + graph_maj_plural + insert_space + (decimal_delete_last_zeros | decimal_with_quantity) @ graph_decimal_final + ) + + graph_integer = ( + pynutil.insert("integer_part: \"") + ((NEMO_SIGMA - "1") @ cardinal_graph) + pynutil.insert("\"") + ) + + graph_integer_only = graph_maj_singular + insert_space + graph_integer_one + graph_integer_only |= graph_maj_plural + insert_space + graph_integer + + final_graph = (graph_integer_only + optional_delete_fractional_zeros) | graph_decimal + + # remove trailing zeros of non zero number in the first 2 digits and fill up to 2 digits + # e.g. 2000 -> 20, 0200->02, 01 -> 01, 10 -> 10 + # not accepted: 002, 00, 0, + two_digits_fractional_part = ( + pynini.closure(NEMO_DIGIT) + (NEMO_DIGIT - "0") + pynini.closure(pynutil.delete("0")) + ) @ ( + (pynutil.delete("0") + (NEMO_DIGIT - "0")) + | ((NEMO_DIGIT - "0") + pynutil.insert("0")) + | ((NEMO_DIGIT - "0") + NEMO_DIGIT) + ) + + graph_min_singular = pynutil.insert(" currency_min: \"") + min_singular + pynutil.insert("\"") + graph_min_plural = pynutil.insert(" currency_min: \"") + min_plural + pynutil.insert("\"") + # format ** dollars ** cent + decimal_graph_with_minor = None + integer_graph_reordered = None + decimal_default_reordered = None + for curr_symbol, _ in maj_singular_labels: + preserve_order = pynutil.insert(" preserve_order: true") + integer_plus_maj = graph_integer + insert_space + pynutil.insert(curr_symbol) @ graph_maj_plural + integer_plus_maj |= graph_integer_one + insert_space + pynutil.insert(curr_symbol) @ graph_maj_singular + + integer_plus_maj_with_comma = pynini.compose( + NEMO_DIGIT - "0" + pynini.closure(NEMO_DIGIT | pynutil.delete(",")), integer_plus_maj + ) + integer_plus_maj = pynini.compose(pynini.closure(NEMO_DIGIT) - "0", integer_plus_maj) + integer_plus_maj |= integer_plus_maj_with_comma + + graph_fractional_one = two_digits_fractional_part @ pynini.cross("1", "one") + graph_fractional_one = pynutil.insert("fractional_part: \"") + graph_fractional_one + pynutil.insert("\"") + graph_fractional = ( + two_digits_fractional_part + @ (pynini.closure(NEMO_DIGIT, 1, 2) - "1") + @ cardinal.graph_hundred_component_at_least_one_none_zero_digit + ) + graph_fractional = pynutil.insert("fractional_part: \"") + graph_fractional + pynutil.insert("\"") + + fractional_plus_min = graph_fractional + insert_space + pynutil.insert(curr_symbol) @ graph_min_plural + fractional_plus_min |= ( + graph_fractional_one + insert_space + pynutil.insert(curr_symbol) @ graph_min_singular + ) + + decimal_graph_with_minor_curr = integer_plus_maj + pynini.cross(".", " ") + fractional_plus_min + + if not deterministic: + decimal_graph_with_minor_curr |= pynutil.add_weight( + integer_plus_maj + + pynini.cross(".", " ") + + pynutil.insert("fractional_part: \"") + + two_digits_fractional_part @ cardinal.graph_hundred_component_at_least_one_none_zero_digit + + pynutil.insert("\""), + weight=0.0001, + ) + default_fraction_graph = (decimal_delete_last_zeros | decimal_with_quantity) @ graph_decimal_final + decimal_graph_with_minor_curr |= ( + pynini.closure(pynutil.delete("0"), 0, 1) + pynutil.delete(".") + fractional_plus_min + ) + decimal_graph_with_minor_curr = ( + pynutil.delete(curr_symbol) + decimal_graph_with_minor_curr + preserve_order + ) + + decimal_graph_with_minor = ( + decimal_graph_with_minor_curr + if decimal_graph_with_minor is None + else pynini.union(decimal_graph_with_minor, decimal_graph_with_minor_curr).optimize() + ) + + if not deterministic: + integer_graph_reordered_curr = ( + pynutil.delete(curr_symbol) + integer_plus_maj + preserve_order + ).optimize() + + integer_graph_reordered = ( + integer_graph_reordered_curr + if integer_graph_reordered is None + else pynini.union(integer_graph_reordered, integer_graph_reordered_curr).optimize() + ) + decimal_default_reordered_curr = ( + pynutil.delete(curr_symbol) + + default_fraction_graph + + insert_space + + pynutil.insert(curr_symbol) @ graph_maj_plural + ) + + decimal_default_reordered = ( + decimal_default_reordered_curr + if decimal_default_reordered is None + else pynini.union(decimal_default_reordered, decimal_default_reordered_curr) + ).optimize() + + # weight for SH + final_graph |= pynutil.add_weight(decimal_graph_with_minor, -0.0001) + + if not deterministic: + final_graph |= integer_graph_reordered | decimal_default_reordered + # to handle "$2.00" cases + final_graph |= pynini.compose( + NEMO_SIGMA + pynutil.delete(".") + pynini.closure(pynutil.delete("0"), 1), integer_graph_reordered + ) + final_graph = self.add_tokens(final_graph.optimize()) + self.fst = final_graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/ordinal.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/ordinal.py new file mode 100644 index 0000000..1ea56c9 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/ordinal.py @@ -0,0 +1,61 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst +from pynini.lib import pynutil + + +class OrdinalFst(GraphFst): + """ + Finite state transducer for classifying ordinal, e.g. + 13th -> ordinal { integer: "thirteen" } + + Args: + cardinal: CardinalFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, cardinal: GraphFst, deterministic: bool = True): + super().__init__(name="ordinal", kind="classify", deterministic=deterministic) + + cardinal_graph = cardinal.graph + cardinal_format = pynini.closure(NEMO_DIGIT | pynini.accep(",")) + st_format = ( + pynini.closure(cardinal_format + (NEMO_DIGIT - "1"), 0, 1) + + pynini.accep("1") + + pynutil.delete(pynini.union("st", "ST")) + ) + nd_format = ( + pynini.closure(cardinal_format + (NEMO_DIGIT - "1"), 0, 1) + + pynini.accep("2") + + pynutil.delete(pynini.union("nd", "ND")) + ) + rd_format = ( + pynini.closure(cardinal_format + (NEMO_DIGIT - "1"), 0, 1) + + pynini.accep("3") + + pynutil.delete(pynini.union("rd", "RD")) + ) + th_format = pynini.closure( + (NEMO_DIGIT - "1" - "2" - "3") + | (cardinal_format + "1" + NEMO_DIGIT) + | (cardinal_format + (NEMO_DIGIT - "1") + (NEMO_DIGIT - "1" - "2" - "3")), + 1, + ) + pynutil.delete(pynini.union("th", "TH")) + self.graph = (st_format | nd_format | rd_format | th_format) @ cardinal_graph + final_graph = pynutil.insert("integer: \"") + self.graph + pynutil.insert("\"") + final_graph = self.add_tokens(final_graph) + self.fst = final_graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/punctuation.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/punctuation.py new file mode 100644 index 0000000..769b020 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/punctuation.py @@ -0,0 +1,65 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from unicodedata import category + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_SPACE, NEMO_SIGMA, GraphFst +from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels +from pynini.examples import plurals +from pynini.lib import pynutil + + +class PunctuationFst(GraphFst): + """ + Finite state transducer for classifying punctuation + e.g. a, -> tokens { name: "a" } tokens { name: "," } + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="punctuation", kind="classify", deterministic=deterministic) + s = "!#%&\'()*+,-./:;<=>?@^_`{|}~\"" + + punct_symbols_to_exclude = ["[", "]"] + punct_unicode = [ + chr(i) + for i in range(sys.maxunicode) + if category(chr(i)).startswith("P") and chr(i) not in punct_symbols_to_exclude + ] + + whitelist_symbols = load_labels(get_abs_path("data/whitelist/symbol.tsv")) + whitelist_symbols = [x[0] for x in whitelist_symbols] + self.punct_marks = [p for p in punct_unicode + list(s) if p not in whitelist_symbols] + + punct = pynini.union(*self.punct_marks) + punct = pynini.closure(punct, 1) + + emphasis = ( + pynini.accep("<") + + ( + (pynini.closure(NEMO_NOT_SPACE - pynini.union("<", ">"), 1) + pynini.closure(pynini.accep("/"), 0, 1)) + | (pynini.accep("/") + pynini.closure(NEMO_NOT_SPACE - pynini.union("<", ">"), 1)) + ) + + pynini.accep(">") + ) + punct = plurals._priority_union(emphasis, punct, NEMO_SIGMA) + + self.graph = punct + self.fst = (pynutil.insert("name: \"") + self.graph + pynutil.insert("\"")).optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/range.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/range.py new file mode 100644 index 0000000..9c237f9 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/range.py @@ -0,0 +1,102 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst, convert_space +from pynini.lib import pynutil + + +class RangeFst(GraphFst): + """ + This class is a composite class of two other class instances + + Args: + time: composed tagger and verbalizer + date: composed tagger and verbalizer + cardinal: tagger + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + lm: whether to use for hybrid LM + """ + + def __init__( + self, time: GraphFst, date: GraphFst, cardinal: GraphFst, deterministic: bool = True, lm: bool = False, + ): + super().__init__(name="range", kind="classify", deterministic=deterministic) + + delete_space = pynini.closure(pynutil.delete(" "), 0, 1) + + approx = pynini.cross("~", "approximately") + + # TIME + time_graph = time + delete_space + pynini.cross("-", " to ") + delete_space + time + self.graph = time_graph | (approx + time) + + cardinal = cardinal.graph_with_and + # YEAR + date_year_four_digit = (NEMO_DIGIT ** 4 + pynini.closure(pynini.accep("s"), 0, 1)) @ date + date_year_two_digit = (NEMO_DIGIT ** 2 + pynini.closure(pynini.accep("s"), 0, 1)) @ date + year_to_year_graph = ( + date_year_four_digit + + delete_space + + pynini.cross("-", " to ") + + delete_space + + (date_year_four_digit | date_year_two_digit | (NEMO_DIGIT ** 2 @ cardinal)) + ) + mid_year_graph = pynini.accep("mid") + pynini.cross("-", " ") + (date_year_four_digit | date_year_two_digit) + + self.graph |= year_to_year_graph + self.graph |= mid_year_graph + + # ADDITION + range_graph = cardinal + pynini.closure(pynini.cross("+", " plus ") + cardinal, 1) + range_graph |= cardinal + pynini.closure(pynini.cross(" + ", " plus ") + cardinal, 1) + range_graph |= approx + cardinal + range_graph |= cardinal + (pynini.cross("...", " ... ") | pynini.accep(" ... ")) + cardinal + + if not deterministic or lm: + # cardinal ---- + cardinal_to_cardinal_graph = ( + cardinal + delete_space + pynini.cross("-", pynini.union(" to ", " minus ")) + delete_space + cardinal + ) + + range_graph |= cardinal_to_cardinal_graph | ( + cardinal + delete_space + pynini.cross(":", " to ") + delete_space + cardinal + ) + + # MULTIPLY + for x in [" x ", "x"]: + range_graph |= cardinal + pynini.closure( + pynini.cross(x, pynini.union(" by ", " times ")) + cardinal, 1 + ) + + for x in ["*", " * "]: + range_graph |= cardinal + pynini.closure(pynini.cross(x, " times ") + cardinal, 1) + + # supports "No. 12" -> "Number 12" + range_graph |= ( + (pynini.cross(pynini.union("NO", "No"), "Number") | pynini.cross("no", "number")) + + pynini.closure(pynini.union(". ", " "), 0, 1) + + cardinal + ) + + for x in ["/", " / "]: + range_graph |= cardinal + pynini.closure(pynini.cross(x, " divided by ") + cardinal, 1) + + self.graph |= range_graph + + self.graph = self.graph.optimize() + graph = pynutil.insert("name: \"") + convert_space(self.graph).optimize() + pynutil.insert("\"") + self.fst = graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/roman.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/roman.py new file mode 100644 index 0000000..e12ee4a --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/roman.py @@ -0,0 +1,114 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_ALPHA, NEMO_SIGMA, GraphFst +from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels +from pynini.lib import pynutil + + +class RomanFst(GraphFst): + """ + Finite state transducer for classifying roman numbers: + e.g. "IV" -> tokens { roman { integer: "four" } } + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True, lm: bool = False): + super().__init__(name="roman", kind="classify", deterministic=deterministic) + + roman_dict = load_labels(get_abs_path("data/roman/roman_to_spoken.tsv")) + default_graph = pynini.string_map(roman_dict).optimize() + default_graph = pynutil.insert("integer: \"") + default_graph + pynutil.insert("\"") + ordinal_limit = 19 + + if deterministic: + # exclude "I" + start_idx = 1 + else: + start_idx = 0 + + graph_teens = pynini.string_map([x[0] for x in roman_dict[start_idx:ordinal_limit]]).optimize() + + # roman numerals up to ordinal_limit with a preceding name are converted to ordinal form + names = get_names() + graph = ( + pynutil.insert("key_the_ordinal: \"") + + names + + pynutil.insert("\"") + + pynini.accep(" ") + + graph_teens @ default_graph + ).optimize() + + # single symbol roman numerals with preceding key words (multiple formats) are converted to cardinal form + key_words = [] + for k_word in load_labels(get_abs_path("data/roman/key_word.tsv")): + key_words.append(k_word) + key_words.append([k_word[0][0].upper() + k_word[0][1:]]) + key_words.append([k_word[0].upper()]) + + key_words = pynini.string_map(key_words).optimize() + graph |= ( + pynutil.insert("key_cardinal: \"") + key_words + pynutil.insert("\"") + pynini.accep(" ") + default_graph + ).optimize() + + if deterministic or lm: + # two digit roman numerals up to 49 + roman_to_cardinal = pynini.compose( + pynini.closure(NEMO_ALPHA, 2), + ( + pynutil.insert("default_cardinal: \"default\" ") + + (pynini.string_map([x[0] for x in roman_dict[:50]]).optimize()) @ default_graph + ), + ) + graph |= roman_to_cardinal + elif not lm: + # two or more digit roman numerals + roman_to_cardinal = pynini.compose( + pynini.difference(NEMO_SIGMA, "I"), + ( + pynutil.insert("default_cardinal: \"default\" integer: \"") + + pynini.string_map(roman_dict).optimize() + + pynutil.insert("\"") + ), + ).optimize() + graph |= roman_to_cardinal + + # convert three digit roman or up with suffix to ordinal + roman_to_ordinal = pynini.compose( + pynini.closure(NEMO_ALPHA, 3), + (pynutil.insert("default_ordinal: \"default\" ") + graph_teens @ default_graph + pynutil.delete("th")), + ) + + graph |= roman_to_ordinal + graph = self.add_tokens(graph.optimize()) + + self.fst = graph.optimize() + + +def get_names(): + """ + Returns the graph that matched common male and female names. + """ + male_labels = load_labels(get_abs_path("data/roman/male.tsv")) + female_labels = load_labels(get_abs_path("data/roman/female.tsv")) + male_labels.extend([[x[0].upper()] for x in male_labels]) + female_labels.extend([[x[0].upper()] for x in female_labels]) + names = pynini.string_map(male_labels).optimize() + names |= pynini.string_map(female_labels).optimize() + return names diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/serial.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/serial.py new file mode 100644 index 0000000..669fd95 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/serial.py @@ -0,0 +1,136 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_ALPHA, + NEMO_DIGIT, + NEMO_NOT_SPACE, + NEMO_SIGMA, + GraphFst, + convert_space, +) +from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels +from pynini.examples import plurals +from pynini.lib import pynutil + + +class SerialFst(GraphFst): + """ + This class is a composite class of two other class instances + + Args: + time: composed tagger and verbalizer + date: composed tagger and verbalizer + cardinal: tagger + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + lm: whether to use for hybrid LM + """ + + def __init__(self, cardinal: GraphFst, ordinal: GraphFst, deterministic: bool = True, lm: bool = False): + super().__init__(name="integer", kind="classify", deterministic=deterministic) + + """ + Finite state transducer for classifying serial (handles only cases without delimiters, + values with delimiters are handled by default). + The serial is a combination of digits, letters and dashes, e.g.: + c325b -> tokens { cardinal { integer: "c three two five b" } } + """ + num_graph = pynini.compose(NEMO_DIGIT ** (6, ...), cardinal.single_digits_graph).optimize() + num_graph |= pynini.compose(NEMO_DIGIT ** (1, 5), cardinal.graph).optimize() + # to handle numbers starting with zero + num_graph |= pynini.compose( + pynini.accep("0") + pynini.closure(NEMO_DIGIT), cardinal.single_digits_graph + ).optimize() + # TODO: "#" doesn't work from the file + symbols_graph = pynini.string_file(get_abs_path("data/whitelist/symbol.tsv")).optimize() | pynini.cross( + "#", "hash" + ) + num_graph |= symbols_graph + + if not self.deterministic and not lm: + num_graph |= cardinal.single_digits_graph + # also allow double digits to be pronounced as integer in serial number + num_graph |= pynutil.add_weight( + NEMO_DIGIT ** 2 @ cardinal.graph_hundred_component_at_least_one_none_zero_digit, weight=0.0001 + ) + + # add space between letter and digit/symbol + symbols = [x[0] for x in load_labels(get_abs_path("data/whitelist/symbol.tsv"))] + symbols = pynini.union(*symbols) + digit_symbol = NEMO_DIGIT | symbols + + graph_with_space = pynini.compose( + pynini.cdrewrite(pynutil.insert(" "), NEMO_ALPHA | symbols, digit_symbol, NEMO_SIGMA), + pynini.cdrewrite(pynutil.insert(" "), digit_symbol, NEMO_ALPHA | symbols, NEMO_SIGMA), + ) + + # serial graph with delimiter + delimiter = pynini.accep("-") | pynini.accep("/") | pynini.accep(" ") + if not deterministic: + delimiter |= pynini.cross("-", " dash ") | pynini.cross("/", " slash ") + + alphas = pynini.closure(NEMO_ALPHA, 1) + letter_num = alphas + delimiter + num_graph + num_letter = pynini.closure(num_graph + delimiter, 1) + alphas + next_alpha_or_num = pynini.closure(delimiter + (alphas | num_graph)) + next_alpha_or_num |= pynini.closure( + delimiter + + num_graph + + plurals._priority_union(pynini.accep(" "), pynutil.insert(" "), NEMO_SIGMA).optimize() + + alphas + ) + + serial_graph = letter_num + next_alpha_or_num + serial_graph |= num_letter + next_alpha_or_num + # numbers only with 2+ delimiters + serial_graph |= ( + num_graph + delimiter + num_graph + delimiter + num_graph + pynini.closure(delimiter + num_graph) + ) + # 2+ symbols + serial_graph |= pynini.compose(NEMO_SIGMA + symbols + NEMO_SIGMA, num_graph + delimiter + num_graph) + + # exclude ordinal numbers from serial options + serial_graph = pynini.compose( + pynini.difference(NEMO_SIGMA, pynini.project(ordinal.graph, "input")), serial_graph + ).optimize() + + serial_graph = pynutil.add_weight(serial_graph, 0.0001) + serial_graph |= ( + pynini.closure(NEMO_NOT_SPACE, 1) + + (pynini.cross("^2", " squared") | pynini.cross("^3", " cubed")).optimize() + ) + + # at least one serial graph with alpha numeric value and optional additional serial/num/alpha values + serial_graph = ( + pynini.closure((serial_graph | num_graph | alphas) + delimiter) + + serial_graph + + pynini.closure(delimiter + (serial_graph | num_graph | alphas)) + ) + + serial_graph |= pynini.compose(graph_with_space, serial_graph.optimize()).optimize() + serial_graph = pynini.compose(pynini.closure(NEMO_NOT_SPACE, 2), serial_graph).optimize() + + # this is not to verbolize "/" as "slash" in cases like "import/export" + serial_graph = pynini.compose( + pynini.difference( + NEMO_SIGMA, pynini.closure(NEMO_ALPHA, 1) + pynini.accep("/") + pynini.closure(NEMO_ALPHA, 1) + ), + serial_graph, + ) + self.graph = serial_graph.optimize() + graph = pynutil.insert("name: \"") + convert_space(self.graph).optimize() + pynutil.insert("\"") + self.fst = graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/telephone.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/telephone.py new file mode 100644 index 0000000..1caedff --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/telephone.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_ALPHA, + NEMO_DIGIT, + NEMO_SIGMA, + GraphFst, + delete_extra_space, + delete_space, + insert_space, + plurals, +) +from nemo_text_processing.text_normalization.en.utils import get_abs_path +from pynini.lib import pynutil + + +class TelephoneFst(GraphFst): + """ + Finite state transducer for classifying telephone, and IP, and SSN which includes country code, number part and extension + country code optional: +*** + number part: ***-***-****, or (***) ***-**** + extension optional: 1-9999 + E.g + +1 123-123-5678-1 -> telephone { country_code: "one" number_part: "one two three, one two three, five six seven eight" extension: "one" } + 1-800-GO-U-HAUL -> telephone { country_code: "one" number_part: "one, eight hundred GO U HAUL" } + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="telephone", kind="classify", deterministic=deterministic) + + add_separator = pynutil.insert(", ") # between components + zero = pynini.cross("0", "zero") + if not deterministic: + zero |= pynini.cross("0", pynini.union("o", "oh")) + digit = pynini.invert(pynini.string_file(get_abs_path("data/number/digit.tsv"))).optimize() | zero + + telephone_prompts = pynini.string_file(get_abs_path("data/telephone/telephone_prompt.tsv")) + country_code = ( + pynini.closure(telephone_prompts + delete_extra_space, 0, 1) + + pynini.closure(pynini.cross("+", "plus "), 0, 1) + + pynini.closure(digit + insert_space, 0, 2) + + digit + + pynutil.insert(",") + ) + country_code |= telephone_prompts + country_code = pynutil.insert("country_code: \"") + country_code + pynutil.insert("\"") + country_code = country_code + pynini.closure(pynutil.delete("-"), 0, 1) + delete_space + insert_space + + area_part_default = pynini.closure(digit + insert_space, 2, 2) + digit + area_part = pynini.cross("800", "eight hundred") | pynini.compose( + pynini.difference(NEMO_SIGMA, "800"), area_part_default + ) + + area_part = ( + (area_part + (pynutil.delete("-") | pynutil.delete("."))) + | ( + pynutil.delete("(") + + area_part + + ((pynutil.delete(")") + pynini.closure(pynutil.delete(" "), 0, 1)) | pynutil.delete(")-")) + ) + ) + add_separator + + del_separator = pynini.closure(pynini.union("-", " ", "."), 0, 1) + number_length = ((NEMO_DIGIT + del_separator) | (NEMO_ALPHA + del_separator)) ** 7 + number_words = pynini.closure( + (NEMO_DIGIT @ digit) + (insert_space | (pynini.cross("-", ', '))) + | NEMO_ALPHA + | (NEMO_ALPHA + pynini.cross("-", ' ')) + ) + number_words |= pynini.closure( + (NEMO_DIGIT @ digit) + (insert_space | (pynini.cross(".", ', '))) + | NEMO_ALPHA + | (NEMO_ALPHA + pynini.cross(".", ' ')) + ) + number_words = pynini.compose(number_length, number_words) + number_part = area_part + number_words + number_part = pynutil.insert("number_part: \"") + number_part + pynutil.insert("\"") + extension = ( + pynutil.insert("extension: \"") + pynini.closure(digit + insert_space, 0, 3) + digit + pynutil.insert("\"") + ) + extension = pynini.closure(insert_space + extension, 0, 1) + + graph = plurals._priority_union(country_code + number_part, number_part, NEMO_SIGMA).optimize() + graph = plurals._priority_union(country_code + number_part + extension, graph, NEMO_SIGMA).optimize() + graph = plurals._priority_union(number_part + extension, graph, NEMO_SIGMA).optimize() + + # ip + ip_prompts = pynini.string_file(get_abs_path("data/telephone/ip_prompt.tsv")) + digit_to_str_graph = digit + pynini.closure(pynutil.insert(" ") + digit, 0, 2) + ip_graph = digit_to_str_graph + (pynini.cross(".", " dot ") + digit_to_str_graph) ** 3 + graph |= ( + pynini.closure( + pynutil.insert("country_code: \"") + ip_prompts + pynutil.insert("\"") + delete_extra_space, 0, 1 + ) + + pynutil.insert("number_part: \"") + + ip_graph.optimize() + + pynutil.insert("\"") + ) + # ssn + ssn_prompts = pynini.string_file(get_abs_path("data/telephone/ssn_prompt.tsv")) + three_digit_part = digit + (pynutil.insert(" ") + digit) ** 2 + two_digit_part = digit + pynutil.insert(" ") + digit + four_digit_part = digit + (pynutil.insert(" ") + digit) ** 3 + ssn_separator = pynini.cross("-", ", ") + ssn_graph = three_digit_part + ssn_separator + two_digit_part + ssn_separator + four_digit_part + + graph |= ( + pynini.closure( + pynutil.insert("country_code: \"") + ssn_prompts + pynutil.insert("\"") + delete_extra_space, 0, 1 + ) + + pynutil.insert("number_part: \"") + + ssn_graph.optimize() + + pynutil.insert("\"") + ) + + final_graph = self.add_tokens(graph) + self.fst = final_graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/time.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/time.py new file mode 100644 index 0000000..4020996 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/time.py @@ -0,0 +1,132 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_DIGIT, + GraphFst, + convert_space, + delete_space, + insert_space, +) +from nemo_text_processing.text_normalization.en.utils import ( + augment_labels_with_punct_at_end, + get_abs_path, + load_labels, +) +from pynini.lib import pynutil + + +class TimeFst(GraphFst): + """ + Finite state transducer for classifying time, e.g. + 12:30 a.m. est -> time { hours: "twelve" minutes: "thirty" suffix: "a m" zone: "e s t" } + 2.30 a.m. -> time { hours: "two" minutes: "thirty" suffix: "a m" } + 02.30 a.m. -> time { hours: "two" minutes: "thirty" suffix: "a m" } + 2.00 a.m. -> time { hours: "two" suffix: "a m" } + 2 a.m. -> time { hours: "two" suffix: "a m" } + 02:00 -> time { hours: "two" } + 2:00 -> time { hours: "two" } + 10:00:05 a.m. -> time { hours: "ten" minutes: "zero" seconds: "five" suffix: "a m" } + + Args: + cardinal: CardinalFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, cardinal: GraphFst, deterministic: bool = True): + super().__init__(name="time", kind="classify", deterministic=deterministic) + suffix_labels = load_labels(get_abs_path("data/time/suffix.tsv")) + suffix_labels.extend(augment_labels_with_punct_at_end(suffix_labels)) + suffix_graph = pynini.string_map(suffix_labels) + + time_zone_graph = pynini.string_file(get_abs_path("data/time/zone.tsv")) + + # only used for < 1000 thousand -> 0 weight + cardinal = cardinal.graph + + labels_hour = [str(x) for x in range(0, 24)] + labels_minute_single = [str(x) for x in range(1, 10)] + labels_minute_double = [str(x) for x in range(10, 60)] + + delete_leading_zero_to_double_digit = (NEMO_DIGIT + NEMO_DIGIT) | ( + pynini.closure(pynutil.delete("0"), 0, 1) + NEMO_DIGIT + ) + + graph_hour = delete_leading_zero_to_double_digit @ pynini.union(*labels_hour) @ cardinal + + graph_minute_single = pynini.union(*labels_minute_single) @ cardinal + graph_minute_double = pynini.union(*labels_minute_double) @ cardinal + + final_graph_hour = pynutil.insert("hours: \"") + graph_hour + pynutil.insert("\"") + final_graph_minute = ( + pynutil.insert("minutes: \"") + + (pynini.cross("0", "o") + insert_space + graph_minute_single | graph_minute_double) + + pynutil.insert("\"") + ) + final_graph_second = ( + pynutil.insert("seconds: \"") + + (pynini.cross("0", "o") + insert_space + graph_minute_single | graph_minute_double) + + pynutil.insert("\"") + ) + final_suffix = pynutil.insert("suffix: \"") + convert_space(suffix_graph) + pynutil.insert("\"") + final_suffix_optional = pynini.closure(delete_space + insert_space + final_suffix, 0, 1) + final_time_zone_optional = pynini.closure( + delete_space + + insert_space + + pynutil.insert("zone: \"") + + convert_space(time_zone_graph) + + pynutil.insert("\""), + 0, + 1, + ) + + # 2:30 pm, 02:30, 2:00 + graph_hm = ( + final_graph_hour + + pynutil.delete(":") + + (pynutil.delete("00") | insert_space + final_graph_minute) + + final_suffix_optional + + final_time_zone_optional + ) + + # 10:30:05 pm, + graph_hms = ( + final_graph_hour + + pynutil.delete(":") + + (pynini.cross("00", " minutes: \"zero\"") | insert_space + final_graph_minute) + + pynutil.delete(":") + + (pynini.cross("00", " seconds: \"zero\"") | insert_space + final_graph_second) + + final_suffix_optional + + final_time_zone_optional + ) + + # 2.xx pm/am + graph_hm2 = ( + final_graph_hour + + pynutil.delete(".") + + (pynutil.delete("00") | insert_space + final_graph_minute) + + delete_space + + insert_space + + final_suffix + + final_time_zone_optional + ) + # 2 pm est + graph_h = final_graph_hour + delete_space + insert_space + final_suffix + final_time_zone_optional + final_graph = (graph_hm | graph_h | graph_hm2 | graph_hms).optimize() + + final_graph = self.add_tokens(final_graph) + self.fst = final_graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify.py new file mode 100644 index 0000000..53ae71e --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify.py @@ -0,0 +1,201 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_WHITE_SPACE, + GraphFst, + delete_extra_space, + delete_space, + generator_main, +) +from nemo_text_processing.text_normalization.en.taggers.abbreviation import AbbreviationFst +from nemo_text_processing.text_normalization.en.taggers.cardinal import CardinalFst +from nemo_text_processing.text_normalization.en.taggers.date import DateFst +from nemo_text_processing.text_normalization.en.taggers.decimal import DecimalFst +from nemo_text_processing.text_normalization.en.taggers.electronic import ElectronicFst +from nemo_text_processing.text_normalization.en.taggers.fraction import FractionFst +from nemo_text_processing.text_normalization.en.taggers.measure import MeasureFst +from nemo_text_processing.text_normalization.en.taggers.money import MoneyFst +from nemo_text_processing.text_normalization.en.taggers.ordinal import OrdinalFst +from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst +from nemo_text_processing.text_normalization.en.taggers.range import RangeFst as RangeFst +from nemo_text_processing.text_normalization.en.taggers.roman import RomanFst +from nemo_text_processing.text_normalization.en.taggers.serial import SerialFst +from nemo_text_processing.text_normalization.en.taggers.telephone import TelephoneFst +from nemo_text_processing.text_normalization.en.taggers.time import TimeFst +from nemo_text_processing.text_normalization.en.taggers.whitelist import WhiteListFst +from nemo_text_processing.text_normalization.en.taggers.word import WordFst +from nemo_text_processing.text_normalization.en.verbalizers.date import DateFst as vDateFst +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst as vOrdinalFst +from nemo_text_processing.text_normalization.en.verbalizers.time import TimeFst as vTimeFst +from pynini.lib import pynutil + + + +class ClassifyFst(GraphFst): + """ + Final class that composes all other classification grammars. This class can process an entire sentence including punctuation. + For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. + More details to deployment at NeMo/tools/text_processing_deployment. + + Args: + input_case: accepting either "lower_cased" or "cased" input. + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. + overwrite_cache: set to True to overwrite .far files + whitelist: path to a file with whitelist replacements + """ + + def __init__( + self, + input_case: str, + deterministic: bool = True, + cache_dir: str = None, + overwrite_cache: bool = False, + whitelist: str = None, + ): + super().__init__(name="tokenize_and_classify", kind="classify", deterministic=deterministic) + + far_file = None + if cache_dir is not None and cache_dir != "None": + os.makedirs(cache_dir, exist_ok=True) + whitelist_file = os.path.basename(whitelist) if whitelist else "" + far_file = os.path.join( + cache_dir, f"en_tn_{deterministic}_deterministic_{input_case}_{whitelist_file}_tokenize.far" + ) + if not overwrite_cache and far_file and os.path.exists(far_file): + self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"] + else: + + start_time = time.time() + cardinal = CardinalFst(deterministic=deterministic) + cardinal_graph = cardinal.fst + + start_time = time.time() + ordinal = OrdinalFst(cardinal=cardinal, deterministic=deterministic) + ordinal_graph = ordinal.fst + + start_time = time.time() + decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic) + decimal_graph = decimal.fst + + start_time = time.time() + fraction = FractionFst(deterministic=deterministic, cardinal=cardinal) + fraction_graph = fraction.fst + + start_time = time.time() + measure = MeasureFst(cardinal=cardinal, decimal=decimal, fraction=fraction, deterministic=deterministic) + measure_graph = measure.fst + + start_time = time.time() + date_graph = DateFst(cardinal=cardinal, deterministic=deterministic).fst + + start_time = time.time() + time_graph = TimeFst(cardinal=cardinal, deterministic=deterministic).fst + + start_time = time.time() + telephone_graph = TelephoneFst(deterministic=deterministic).fst + + start_time = time.time() + electonic_graph = ElectronicFst(deterministic=deterministic).fst + + start_time = time.time() + money_graph = MoneyFst(cardinal=cardinal, decimal=decimal, deterministic=deterministic).fst + + + start_time = time.time() + whitelist_graph = WhiteListFst( + input_case=input_case, deterministic=deterministic, input_file=whitelist + ).fst + + start_time = time.time() + punctuation = PunctuationFst(deterministic=deterministic) + punct_graph = punctuation.fst + + start_time = time.time() + word_graph = WordFst(punctuation=punctuation, deterministic=deterministic).fst + + + start_time = time.time() + serial_graph = SerialFst(cardinal=cardinal, ordinal=ordinal, deterministic=deterministic).fst + + + start_time = time.time() + v_time_graph = vTimeFst(deterministic=deterministic).fst + v_ordinal_graph = vOrdinalFst(deterministic=deterministic) + v_date_graph = vDateFst(ordinal=v_ordinal_graph, deterministic=deterministic).fst + time_final = pynini.compose(time_graph, v_time_graph) + date_final = pynini.compose(date_graph, v_date_graph) + range_graph = RangeFst( + time=time_final, date=date_final, cardinal=cardinal, deterministic=deterministic + ).fst + + + classify = ( + pynutil.add_weight(whitelist_graph, 1.01) + | pynutil.add_weight(time_graph, 1.1) + | pynutil.add_weight(date_graph, 1.09) + | pynutil.add_weight(decimal_graph, 1.1) + | pynutil.add_weight(measure_graph, 1.1) + | pynutil.add_weight(cardinal_graph, 1.1) + | pynutil.add_weight(ordinal_graph, 1.1) + | pynutil.add_weight(money_graph, 1.1) + | pynutil.add_weight(telephone_graph, 1.1) + | pynutil.add_weight(electonic_graph, 1.1) + | pynutil.add_weight(fraction_graph, 1.1) + | pynutil.add_weight(range_graph, 1.1) + | pynutil.add_weight(serial_graph, 1.1001) # should be higher than the rest of the classes + ) + + roman_graph = RomanFst(deterministic=deterministic).fst + classify |= pynutil.add_weight(roman_graph, 1.1) + + if not deterministic: + abbreviation_graph = AbbreviationFst(deterministic=deterministic).fst + classify |= pynutil.add_weight(abbreviation_graph, 100) + + punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=2.1) + pynutil.insert(" }") + punct = pynini.closure( + pynini.compose(pynini.closure(NEMO_WHITE_SPACE, 1), delete_extra_space) + | (pynutil.insert(" ") + punct), + 1, + ) + + classify |= pynutil.add_weight(word_graph, 100) + token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }") + token_plus_punct = ( + pynini.closure(punct + pynutil.insert(" ")) + token + pynini.closure(pynutil.insert(" ") + punct) + ) + + graph = token_plus_punct + pynini.closure( + ( + pynini.compose(pynini.closure(NEMO_WHITE_SPACE, 1), delete_extra_space) + | (pynutil.insert(" ") + punct + pynutil.insert(" ")) + ) + + token_plus_punct + ) + + graph = delete_space + graph + delete_space + graph |= punct + + self.fst = graph.optimize() + + if far_file: + generator_main(far_file, {"tokenize_and_classify": self.fst}) + diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify_lm.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify_lm.py new file mode 100644 index 0000000..fa48c37 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify_lm.py @@ -0,0 +1,228 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_CHAR, + NEMO_DIGIT, + NEMO_NOT_SPACE, + NEMO_SIGMA, + NEMO_WHITE_SPACE, + GraphFst, + delete_extra_space, + delete_space, + generator_main, +) +from nemo_text_processing.text_normalization.en.taggers.cardinal import CardinalFst +from nemo_text_processing.text_normalization.en.taggers.date import DateFst +from nemo_text_processing.text_normalization.en.taggers.decimal import DecimalFst +from nemo_text_processing.text_normalization.en.taggers.electronic import ElectronicFst +from nemo_text_processing.text_normalization.en.taggers.fraction import FractionFst +from nemo_text_processing.text_normalization.en.taggers.measure import MeasureFst +from nemo_text_processing.text_normalization.en.taggers.money import MoneyFst +from nemo_text_processing.text_normalization.en.taggers.ordinal import OrdinalFst +from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst +from nemo_text_processing.text_normalization.en.taggers.range import RangeFst as RangeFst +from nemo_text_processing.text_normalization.en.taggers.roman import RomanFst +from nemo_text_processing.text_normalization.en.taggers.serial import SerialFst +from nemo_text_processing.text_normalization.en.taggers.telephone import TelephoneFst +from nemo_text_processing.text_normalization.en.taggers.time import TimeFst +from nemo_text_processing.text_normalization.en.taggers.whitelist import WhiteListFst +from nemo_text_processing.text_normalization.en.taggers.word import WordFst +from nemo_text_processing.text_normalization.en.verbalizers.cardinal import CardinalFst as vCardinal +from nemo_text_processing.text_normalization.en.verbalizers.date import DateFst as vDate +from nemo_text_processing.text_normalization.en.verbalizers.decimal import DecimalFst as vDecimal +from nemo_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst as vElectronic +from nemo_text_processing.text_normalization.en.verbalizers.fraction import FractionFst as vFraction +from nemo_text_processing.text_normalization.en.verbalizers.measure import MeasureFst as vMeasure +from nemo_text_processing.text_normalization.en.verbalizers.money import MoneyFst as vMoney +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst as vOrdinal +from nemo_text_processing.text_normalization.en.verbalizers.roman import RomanFst as vRoman +from nemo_text_processing.text_normalization.en.verbalizers.telephone import TelephoneFst as vTelephone +from nemo_text_processing.text_normalization.en.verbalizers.time import TimeFst as vTime +from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst as vWord +from pynini.examples import plurals +from pynini.lib import pynutil + +from nemo.utils import logging + + +class ClassifyFst(GraphFst): + """ + Final class that composes all other classification grammars. This class can process an entire sentence including punctuation. + For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File. + More details to deployment at NeMo/tools/text_processing_deployment. + + Args: + input_case: accepting either "lower_cased" or "cased" input. + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. + overwrite_cache: set to True to overwrite .far files + whitelist: path to a file with whitelist replacements + """ + + def __init__( + self, + input_case: str, + deterministic: bool = True, + cache_dir: str = None, + overwrite_cache: bool = True, + whitelist: str = None, + ): + super().__init__(name="tokenize_and_classify", kind="classify", deterministic=deterministic) + + far_file = None + if cache_dir is not None and cache_dir != 'None': + os.makedirs(cache_dir, exist_ok=True) + whitelist_file = os.path.basename(whitelist) if whitelist else "" + far_file = os.path.join( + cache_dir, f"_{input_case}_en_tn_{deterministic}_deterministic{whitelist_file}_lm.far" + ) + if not overwrite_cache and far_file and os.path.exists(far_file): + self.fst = pynini.Far(far_file, mode='r')['tokenize_and_classify'] + no_digits = pynini.closure(pynini.difference(NEMO_CHAR, NEMO_DIGIT)) + self.fst_no_digits = pynini.compose(self.fst, no_digits).optimize() + logging.info(f'ClassifyFst.fst was restored from {far_file}.') + else: + logging.info(f'Creating ClassifyFst grammars. This might take some time...') + # TAGGERS + cardinal = CardinalFst(deterministic=True, lm=True) + cardinal_tagger = cardinal + cardinal_graph = cardinal.fst + + ordinal = OrdinalFst(cardinal=cardinal, deterministic=True) + ordinal_graph = ordinal.fst + + decimal = DecimalFst(cardinal=cardinal, deterministic=True) + decimal_graph = decimal.fst + fraction = FractionFst(deterministic=True, cardinal=cardinal) + fraction_graph = fraction.fst + + measure = MeasureFst(cardinal=cardinal, decimal=decimal, fraction=fraction, deterministic=True) + measure_graph = measure.fst + date = DateFst(cardinal=cardinal, deterministic=True, lm=True) + date_graph = date.fst + punctuation = PunctuationFst(deterministic=True) + punct_graph = punctuation.graph + word_graph = WordFst(punctuation=punctuation, deterministic=deterministic).graph + time_graph = TimeFst(cardinal=cardinal, deterministic=True).fst + telephone_graph = TelephoneFst(deterministic=True).fst + electronic_graph = ElectronicFst(deterministic=True).fst + money_graph = MoneyFst(cardinal=cardinal, decimal=decimal, deterministic=False).fst + whitelist = WhiteListFst(input_case=input_case, deterministic=False, input_file=whitelist) + whitelist_graph = whitelist.graph + serial_graph = SerialFst(cardinal=cardinal, ordinal=ordinal, deterministic=deterministic, lm=True).fst + + # VERBALIZERS + cardinal = vCardinal(deterministic=True) + v_cardinal_graph = cardinal.fst + decimal = vDecimal(cardinal=cardinal, deterministic=True) + v_decimal_graph = decimal.fst + ordinal = vOrdinal(deterministic=True) + v_ordinal_graph = ordinal.fst + fraction = vFraction(deterministic=True, lm=True) + v_fraction_graph = fraction.fst + v_telephone_graph = vTelephone(deterministic=True).fst + v_electronic_graph = vElectronic(deterministic=True).fst + measure = vMeasure(decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=False) + v_measure_graph = measure.fst + v_time_graph = vTime(deterministic=True).fst + v_date_graph = vDate(ordinal=ordinal, deterministic=deterministic, lm=True).fst + v_money_graph = vMoney(decimal=decimal, deterministic=deterministic).fst + v_roman_graph = vRoman(deterministic=deterministic).fst + v_word_graph = vWord(deterministic=deterministic).fst + + cardinal_or_date_final = plurals._priority_union(date_graph, cardinal_graph, NEMO_SIGMA) + cardinal_or_date_final = pynini.compose(cardinal_or_date_final, (v_cardinal_graph | v_date_graph)) + + time_final = pynini.compose(time_graph, v_time_graph) + ordinal_final = pynini.compose(ordinal_graph, v_ordinal_graph) + sem_w = 1 + word_w = 100 + punct_w = 2 + classify_and_verbalize = ( + pynutil.add_weight(time_final, sem_w) + | pynutil.add_weight(pynini.compose(decimal_graph, v_decimal_graph), sem_w) + | pynutil.add_weight(pynini.compose(measure_graph, v_measure_graph), sem_w) + | pynutil.add_weight(ordinal_final, sem_w) + | pynutil.add_weight(pynini.compose(telephone_graph, v_telephone_graph), sem_w) + | pynutil.add_weight(pynini.compose(electronic_graph, v_electronic_graph), sem_w) + | pynutil.add_weight(pynini.compose(fraction_graph, v_fraction_graph), sem_w) + | pynutil.add_weight(pynini.compose(money_graph, v_money_graph), sem_w) + | pynutil.add_weight(cardinal_or_date_final, sem_w) + | pynutil.add_weight(whitelist_graph, sem_w) + | pynutil.add_weight( + pynini.compose(serial_graph, v_word_graph), 1.1001 + ) # should be higher than the rest of the classes + ).optimize() + + roman_graph = RomanFst(deterministic=deterministic, lm=True).fst + # the weight matches the word_graph weight for "I" cases in long sentences with multiple semiotic tokens + classify_and_verbalize |= pynutil.add_weight(pynini.compose(roman_graph, v_roman_graph), sem_w) + + date_final = pynini.compose(date_graph, v_date_graph) + range_graph = RangeFst( + time=time_final, cardinal=cardinal_tagger, date=date_final, deterministic=deterministic + ).fst + classify_and_verbalize |= pynutil.add_weight(pynini.compose(range_graph, v_word_graph), sem_w) + classify_and_verbalize = pynutil.insert("< ") + classify_and_verbalize + pynutil.insert(" >") + classify_and_verbalize |= pynutil.add_weight(word_graph, word_w) + + punct_only = pynutil.add_weight(punct_graph, weight=punct_w) + punct = pynini.closure( + pynini.compose(pynini.closure(NEMO_WHITE_SPACE, 1), delete_extra_space) + | (pynutil.insert(" ") + punct_only), + 1, + ) + + def get_token_sem_graph(classify_and_verbalize): + token_plus_punct = ( + pynini.closure(punct + pynutil.insert(" ")) + + classify_and_verbalize + + pynini.closure(pynutil.insert(" ") + punct) + ) + + graph = token_plus_punct + pynini.closure( + ( + pynini.compose(pynini.closure(NEMO_WHITE_SPACE, 1), delete_extra_space) + | (pynutil.insert(" ") + punct + pynutil.insert(" ")) + ) + + token_plus_punct + ) + + graph |= punct_only + pynini.closure(punct) + graph = delete_space + graph + delete_space + + remove_extra_spaces = pynini.closure(NEMO_NOT_SPACE, 1) + pynini.closure( + delete_extra_space + pynini.closure(NEMO_NOT_SPACE, 1) + ) + remove_extra_spaces |= ( + pynini.closure(pynutil.delete(" "), 1) + + pynini.closure(NEMO_NOT_SPACE, 1) + + pynini.closure(delete_extra_space + pynini.closure(NEMO_NOT_SPACE, 1)) + ) + + graph = pynini.compose(graph.optimize(), remove_extra_spaces).optimize() + return graph + + self.fst = get_token_sem_graph(classify_and_verbalize) + no_digits = pynini.closure(pynini.difference(NEMO_CHAR, NEMO_DIGIT)) + self.fst_no_digits = pynini.compose(self.fst, no_digits).optimize() + + if far_file: + generator_main(far_file, {"tokenize_and_classify": self.fst}) + logging.info(f'ClassifyFst grammars are saved to {far_file}.') diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify_with_audio.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify_with_audio.py new file mode 100644 index 0000000..d9adc4c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/tokenize_and_classify_with_audio.py @@ -0,0 +1,229 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_CHAR, + NEMO_DIGIT, + NEMO_NOT_SPACE, + NEMO_WHITE_SPACE, + GraphFst, + delete_extra_space, + delete_space, + generator_main, +) +from nemo_text_processing.text_normalization.en.taggers.abbreviation import AbbreviationFst +from nemo_text_processing.text_normalization.en.taggers.cardinal import CardinalFst +from nemo_text_processing.text_normalization.en.taggers.date import DateFst +from nemo_text_processing.text_normalization.en.taggers.decimal import DecimalFst +from nemo_text_processing.text_normalization.en.taggers.electronic import ElectronicFst +from nemo_text_processing.text_normalization.en.taggers.fraction import FractionFst +from nemo_text_processing.text_normalization.en.taggers.measure import MeasureFst +from nemo_text_processing.text_normalization.en.taggers.money import MoneyFst +from nemo_text_processing.text_normalization.en.taggers.ordinal import OrdinalFst +from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst +from nemo_text_processing.text_normalization.en.taggers.range import RangeFst as RangeFst +from nemo_text_processing.text_normalization.en.taggers.roman import RomanFst +from nemo_text_processing.text_normalization.en.taggers.serial import SerialFst +from nemo_text_processing.text_normalization.en.taggers.telephone import TelephoneFst +from nemo_text_processing.text_normalization.en.taggers.time import TimeFst +from nemo_text_processing.text_normalization.en.taggers.whitelist import WhiteListFst +from nemo_text_processing.text_normalization.en.taggers.word import WordFst +from nemo_text_processing.text_normalization.en.verbalizers.abbreviation import AbbreviationFst as vAbbreviation +from nemo_text_processing.text_normalization.en.verbalizers.cardinal import CardinalFst as vCardinal +from nemo_text_processing.text_normalization.en.verbalizers.date import DateFst as vDate +from nemo_text_processing.text_normalization.en.verbalizers.decimal import DecimalFst as vDecimal +from nemo_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst as vElectronic +from nemo_text_processing.text_normalization.en.verbalizers.fraction import FractionFst as vFraction +from nemo_text_processing.text_normalization.en.verbalizers.measure import MeasureFst as vMeasure +from nemo_text_processing.text_normalization.en.verbalizers.money import MoneyFst as vMoney +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst as vOrdinal +from nemo_text_processing.text_normalization.en.verbalizers.roman import RomanFst as vRoman +from nemo_text_processing.text_normalization.en.verbalizers.telephone import TelephoneFst as vTelephone +from nemo_text_processing.text_normalization.en.verbalizers.time import TimeFst as vTime +from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst as vWord +from pynini.lib import pynutil + +from nemo.utils import logging + + +class ClassifyFst(GraphFst): + """ + Final class that composes all other classification grammars. This class can process an entire sentence including punctuation. + For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File. + More details to deployment at NeMo/tools/text_processing_deployment. + + Args: + input_case: accepting either "lower_cased" or "cased" input. + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. + overwrite_cache: set to True to overwrite .far files + whitelist: path to a file with whitelist replacements + """ + + def __init__( + self, + input_case: str, + deterministic: bool = True, + cache_dir: str = None, + overwrite_cache: bool = True, + whitelist: str = None, + ): + super().__init__(name="tokenize_and_classify", kind="classify", deterministic=deterministic) + + far_file = None + if cache_dir is not None and cache_dir != 'None': + os.makedirs(cache_dir, exist_ok=True) + whitelist_file = os.path.basename(whitelist) if whitelist else "" + far_file = os.path.join( + cache_dir, f"_{input_case}_en_tn_{deterministic}_deterministic{whitelist_file}.far" + ) + if not overwrite_cache and far_file and os.path.exists(far_file): + self.fst = pynini.Far(far_file, mode='r')['tokenize_and_classify'] + no_digits = pynini.closure(pynini.difference(NEMO_CHAR, NEMO_DIGIT)) + self.fst_no_digits = pynini.compose(self.fst, no_digits).optimize() + logging.info(f'ClassifyFst.fst was restored from {far_file}.') + else: + logging.info(f'Creating ClassifyFst grammars. This might take some time...') + # TAGGERS + cardinal = CardinalFst(deterministic=deterministic) + cardinal_graph = cardinal.fst + + ordinal = OrdinalFst(cardinal=cardinal, deterministic=deterministic) + deterministic_ordinal = OrdinalFst(cardinal=cardinal, deterministic=True) + ordinal_graph = ordinal.fst + + decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic) + decimal_graph = decimal.fst + fraction = FractionFst(deterministic=deterministic, cardinal=cardinal) + fraction_graph = fraction.fst + + measure = MeasureFst(cardinal=cardinal, decimal=decimal, fraction=fraction, deterministic=deterministic) + measure_graph = measure.fst + date_graph = DateFst(cardinal=cardinal, deterministic=deterministic).fst + punctuation = PunctuationFst(deterministic=True) + punct_graph = punctuation.graph + word_graph = WordFst(punctuation=punctuation, deterministic=deterministic).graph + time_graph = TimeFst(cardinal=cardinal, deterministic=deterministic).fst + telephone_graph = TelephoneFst(deterministic=deterministic).fst + electronic_graph = ElectronicFst(deterministic=deterministic).fst + money_graph = MoneyFst(cardinal=cardinal, decimal=decimal, deterministic=deterministic).fst + whitelist = WhiteListFst(input_case=input_case, deterministic=deterministic, input_file=whitelist) + whitelist_graph = whitelist.graph + serial_graph = SerialFst(cardinal=cardinal, ordinal=deterministic_ordinal, deterministic=deterministic).fst + + # VERBALIZERS + cardinal = vCardinal(deterministic=deterministic) + v_cardinal_graph = cardinal.fst + decimal = vDecimal(cardinal=cardinal, deterministic=deterministic) + v_decimal_graph = decimal.fst + ordinal = vOrdinal(deterministic=deterministic) + v_ordinal_graph = ordinal.fst + fraction = vFraction(deterministic=deterministic) + v_fraction_graph = fraction.fst + v_telephone_graph = vTelephone(deterministic=deterministic).fst + v_electronic_graph = vElectronic(deterministic=deterministic).fst + measure = vMeasure(decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=deterministic) + v_measure_graph = measure.fst + v_time_graph = vTime(deterministic=deterministic).fst + v_date_graph = vDate(ordinal=ordinal, deterministic=deterministic).fst + v_money_graph = vMoney(decimal=decimal, deterministic=deterministic).fst + v_roman_graph = vRoman(deterministic=deterministic).fst + v_abbreviation = vAbbreviation(deterministic=deterministic).fst + + det_v_time_graph = vTime(deterministic=True).fst + det_v_date_graph = vDate(ordinal=vOrdinal(deterministic=True), deterministic=True).fst + time_final = pynini.compose(time_graph, det_v_time_graph) + date_final = pynini.compose(date_graph, det_v_date_graph) + range_graph = RangeFst( + time=time_final, date=date_final, cardinal=CardinalFst(deterministic=True), deterministic=deterministic + ).fst + v_word_graph = vWord(deterministic=deterministic).fst + + sem_w = 1 + word_w = 100 + punct_w = 2 + classify_and_verbalize = ( + pynutil.add_weight(whitelist_graph, sem_w) + | pynutil.add_weight(pynini.compose(time_graph, v_time_graph), sem_w) + | pynutil.add_weight(pynini.compose(decimal_graph, v_decimal_graph), sem_w) + | pynutil.add_weight(pynini.compose(measure_graph, v_measure_graph), sem_w) + | pynutil.add_weight(pynini.compose(cardinal_graph, v_cardinal_graph), sem_w) + | pynutil.add_weight(pynini.compose(ordinal_graph, v_ordinal_graph), sem_w) + | pynutil.add_weight(pynini.compose(telephone_graph, v_telephone_graph), sem_w) + | pynutil.add_weight(pynini.compose(electronic_graph, v_electronic_graph), sem_w) + | pynutil.add_weight(pynini.compose(fraction_graph, v_fraction_graph), sem_w) + | pynutil.add_weight(pynini.compose(money_graph, v_money_graph), sem_w) + | pynutil.add_weight(word_graph, word_w) + | pynutil.add_weight(pynini.compose(date_graph, v_date_graph), sem_w - 0.01) + | pynutil.add_weight(pynini.compose(range_graph, v_word_graph), sem_w) + | pynutil.add_weight( + pynini.compose(serial_graph, v_word_graph), 1.1001 + ) # should be higher than the rest of the classes + ).optimize() + + if not deterministic: + roman_graph = RomanFst(deterministic=deterministic).fst + # the weight matches the word_graph weight for "I" cases in long sentences with multiple semiotic tokens + classify_and_verbalize |= pynutil.add_weight(pynini.compose(roman_graph, v_roman_graph), word_w) + + abbreviation_graph = AbbreviationFst(whitelist=whitelist, deterministic=deterministic).fst + classify_and_verbalize |= pynutil.add_weight( + pynini.compose(abbreviation_graph, v_abbreviation), word_w + ) + + punct_only = pynutil.add_weight(punct_graph, weight=punct_w) + punct = pynini.closure( + pynini.compose(pynini.closure(NEMO_WHITE_SPACE, 1), delete_extra_space) + | (pynutil.insert(" ") + punct_only), + 1, + ) + + token_plus_punct = ( + pynini.closure(punct + pynutil.insert(" ")) + + classify_and_verbalize + + pynini.closure(pynutil.insert(" ") + punct) + ) + + graph = token_plus_punct + pynini.closure( + ( + pynini.compose(pynini.closure(NEMO_WHITE_SPACE, 1), delete_extra_space) + | (pynutil.insert(" ") + punct + pynutil.insert(" ")) + ) + + token_plus_punct + ) + + graph |= punct_only + pynini.closure(punct) + graph = delete_space + graph + delete_space + + remove_extra_spaces = pynini.closure(NEMO_NOT_SPACE, 1) + pynini.closure( + delete_extra_space + pynini.closure(NEMO_NOT_SPACE, 1) + ) + remove_extra_spaces |= ( + pynini.closure(pynutil.delete(" "), 1) + + pynini.closure(NEMO_NOT_SPACE, 1) + + pynini.closure(delete_extra_space + pynini.closure(NEMO_NOT_SPACE, 1)) + ) + + graph = pynini.compose(graph.optimize(), remove_extra_spaces).optimize() + self.fst = graph + no_digits = pynini.closure(pynini.difference(NEMO_CHAR, NEMO_DIGIT)) + self.fst_no_digits = pynini.compose(graph, no_digits).optimize() + + if far_file: + generator_main(far_file, {"tokenize_and_classify": self.fst}) + logging.info(f'ClassifyFst grammars are saved to {far_file}.') diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/whitelist.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/whitelist.py new file mode 100644 index 0000000..54c5b53 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/whitelist.py @@ -0,0 +1,151 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_CHAR, + NEMO_NOT_SPACE, + NEMO_SIGMA, + NEMO_UPPER, + SINGULAR_TO_PLURAL, + GraphFst, + convert_space, +) +from nemo_text_processing.text_normalization.en.taggers.roman import get_names +from nemo_text_processing.text_normalization.en.utils import ( + augment_labels_with_punct_at_end, + get_abs_path, + load_labels, +) +from pynini.lib import pynutil + + +class WhiteListFst(GraphFst): + """ + Finite state transducer for classifying whitelist, e.g. + misses -> tokens { name: "mrs" } + for non-deterministic case: "Dr. Abc" -> + tokens { name: "drive" } tokens { name: "Abc" } + tokens { name: "doctor" } tokens { name: "Abc" } + tokens { name: "Dr." } tokens { name: "Abc" } + This class has highest priority among all classifier grammars. Whitelisted tokens are defined and loaded from "data/whitelist.tsv". + + Args: + input_case: accepting either "lower_cased" or "cased" input. + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + input_file: path to a file with whitelist replacements + """ + + def __init__(self, input_case: str, deterministic: bool = True, input_file: str = None): + super().__init__(name="whitelist", kind="classify", deterministic=deterministic) + + def _get_whitelist_graph(input_case, file, keep_punct_add_end: bool = False): + whitelist = load_labels(file) + if input_case == "lower_cased": + whitelist = [[x.lower(), y] for x, y in whitelist] + else: + whitelist = [[x, y] for x, y in whitelist] + + if keep_punct_add_end: + whitelist.extend(augment_labels_with_punct_at_end(whitelist)) + + graph = pynini.string_map(whitelist) + return graph + + graph = _get_whitelist_graph(input_case, get_abs_path("data/whitelist/tts.tsv")) + graph |= _get_whitelist_graph(input_case, get_abs_path("data/whitelist/UK_to_US.tsv")) # Jiayu 2022.10 + graph |= pynini.compose( + pynini.difference(NEMO_SIGMA, pynini.accep("/")).optimize(), + _get_whitelist_graph(input_case, get_abs_path("data/whitelist/symbol.tsv")), + ).optimize() + + if deterministic: + names = get_names() + graph |= ( + pynini.cross(pynini.union("st", "St", "ST"), "Saint") + + pynini.closure(pynutil.delete(".")) + + pynini.accep(" ") + + names + ) + else: + graph |= _get_whitelist_graph( + input_case, get_abs_path("data/whitelist/alternatives.tsv"), keep_punct_add_end=True + ) + + for x in [".", ". "]: + graph |= ( + NEMO_UPPER + + pynini.closure(pynutil.delete(x) + NEMO_UPPER, 2) + + pynini.closure(pynutil.delete("."), 0, 1) + ) + + if not deterministic: + multiple_forms_whitelist_graph = get_formats(get_abs_path("data/whitelist/alternatives_all_format.tsv")) + graph |= multiple_forms_whitelist_graph + + graph_unit = pynini.string_file(get_abs_path("data/measure/unit.tsv")) | pynini.string_file( + get_abs_path("data/measure/unit_alternatives.tsv") + ) + graph_unit_plural = graph_unit @ SINGULAR_TO_PLURAL + units_graph = pynini.compose(NEMO_CHAR ** (3, ...), convert_space(graph_unit | graph_unit_plural)) + graph |= units_graph + + # convert to states only if comma is present before the abbreviation to avoid converting all caps words, + # e.g. "IN", "OH", "OK" + # TODO or only exclude above? + states = load_labels(get_abs_path("data/address/state.tsv")) + additional_options = [] + for x, y in states: + if input_case == "lower_cased": + x = x.lower() + additional_options.append((x, f"{y[0]}.{y[1:]}")) + if not deterministic: + additional_options.append((x, f"{y[0]}.{y[1:]}.")) + + states.extend(additional_options) + state_graph = pynini.string_map(states) + graph |= pynini.closure(NEMO_NOT_SPACE, 1) + pynini.union(", ", ",") + pynini.invert(state_graph).optimize() + + if input_file: + whitelist_provided = _get_whitelist_graph(input_case, input_file) + if not deterministic: + graph |= whitelist_provided + else: + graph = whitelist_provided + + self.graph = (convert_space(graph)).optimize() + + self.fst = (pynutil.insert("name: \"") + self.graph + pynutil.insert("\"")).optimize() + + +def get_formats(input_f, input_case="cased", is_default=True): + """ + Adds various abbreviation format options to the list of acceptable input forms + """ + multiple_formats = load_labels(input_f) + additional_options = [] + for x, y in multiple_formats: + if input_case == "lower_cased": + x = x.lower() + additional_options.append((f"{x}.", y)) # default "dr" -> doctor, this includes period "dr." -> doctor + additional_options.append((f"{x[0].upper() + x[1:]}", f"{y[0].upper() + y[1:]}")) # "Dr" -> Doctor + additional_options.append((f"{x[0].upper() + x[1:]}.", f"{y[0].upper() + y[1:]}")) # "Dr." -> Doctor + multiple_formats.extend(additional_options) + + if not is_default: + multiple_formats = [(x, f"|raw_start|{x}|raw_end||norm_start|{y}|norm_end|") for (x, y) in multiple_formats] + + multiple_formats = pynini.string_map(multiple_formats) + return multiple_formats diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/taggers/word.py b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/word.py new file mode 100644 index 0000000..fa6a965 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/taggers/word.py @@ -0,0 +1,90 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + MIN_NEG_WEIGHT, + NEMO_ALPHA, + NEMO_DIGIT, + NEMO_NOT_SPACE, + NEMO_SIGMA, + GraphFst, + convert_space, + get_abs_path, +) +from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst +from pynini.examples import plurals +from pynini.lib import pynutil + + +class WordFst(GraphFst): + """ + Finite state transducer for classifying word. Considers sentence boundary exceptions. + e.g. sleep -> tokens { name: "sleep" } + + Args: + punctuation: PunctuationFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, punctuation: GraphFst, deterministic: bool = True): + super().__init__(name="word", kind="classify", deterministic=deterministic) + + punct = PunctuationFst().graph + default_graph = pynini.closure(pynini.difference(NEMO_NOT_SPACE, punct.project("input")), 1) + symbols_to_exclude = (pynini.union("$", "€", "₩", "£", "¥", "#", "%") | NEMO_DIGIT).optimize() + graph = pynini.closure(pynini.difference(NEMO_NOT_SPACE, symbols_to_exclude), 1) + graph = pynutil.add_weight(graph, MIN_NEG_WEIGHT) | default_graph + + # leave phones of format [HH AH0 L OW1] untouched + phoneme_unit = pynini.closure(NEMO_ALPHA, 1) + pynini.closure(NEMO_DIGIT) + phoneme = ( + pynini.accep(pynini.escape("[")) + + pynini.closure(phoneme_unit + pynini.accep(" ")) + + phoneme_unit + + pynini.accep(pynini.escape("]")) + ) + + # leave IPA phones of format [ˈdoʊv] untouched, single words and sentences with punctuation marks allowed + punct_marks = pynini.union(*punctuation.punct_marks).optimize() + stress = pynini.union("ˈ", "'", "ˌ") + ipa_phoneme_unit = pynini.string_file(get_abs_path("data/whitelist/ipa_symbols.tsv")) + # word in ipa form + ipa_phonemes = ( + pynini.closure(stress, 0, 1) + + pynini.closure(ipa_phoneme_unit, 1) + + pynini.closure(stress | ipa_phoneme_unit) + ) + # allow sentences of words in IPA format separated with spaces or punct marks + delim = (punct_marks | pynini.accep(" ")) ** (1, ...) + ipa_phonemes = ipa_phonemes + pynini.closure(delim + ipa_phonemes) + pynini.closure(delim, 0, 1) + ipa_phonemes = (pynini.accep(pynini.escape("[")) + ipa_phonemes + pynini.accep(pynini.escape("]"))).optimize() + + if not deterministic: + phoneme = ( + pynini.accep(pynini.escape("[")) + + pynini.closure(pynini.accep(" "), 0, 1) + + pynini.closure(phoneme_unit + pynini.accep(" ")) + + phoneme_unit + + pynini.closure(pynini.accep(" "), 0, 1) + + pynini.accep(pynini.escape("]")) + ).optimize() + ipa_phonemes = ( + pynini.accep(pynini.escape("[")) + ipa_phonemes + pynini.accep(pynini.escape("]")) + ).optimize() + + phoneme |= ipa_phonemes + self.graph = plurals._priority_union(convert_space(phoneme.optimize()), graph, NEMO_SIGMA) + self.fst = (pynutil.insert("name: \"") + self.graph + pynutil.insert("\"")).optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/utils.py b/utils/speechio/nemo_text_processing/text_normalization/en/utils.py new file mode 100644 index 0000000..3a88fd8 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/utils.py @@ -0,0 +1,60 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv +import os + + +def get_abs_path(rel_path): + """ + Get absolute path + + Args: + rel_path: relative path to this file + + Returns absolute path + """ + return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path + + +def load_labels(abs_path): + """ + loads relative path file as dictionary + + Args: + abs_path: absolute path + + Returns dictionary of mappings + """ + label_tsv = open(abs_path, encoding="utf-8") + labels = list(csv.reader(label_tsv, delimiter="\t")) + return labels + + +def augment_labels_with_punct_at_end(labels): + """ + augments labels: if key ends on a punctuation that value does not have, add a new label + where the value maintains the punctuation + + Args: + labels : input labels + Returns: + additional labels + """ + res = [] + for label in labels: + if len(label) > 1: + if label[0][-1] == "." and label[1][-1] != ".": + res.append([label[0], label[1] + "."] + label[2:]) + return res diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/__init__.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/__init__.py new file mode 100644 index 0000000..bc443be --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/abbreviation.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/abbreviation.py new file mode 100644 index 0000000..1917924 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/abbreviation.py @@ -0,0 +1,35 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst +from pynini.lib import pynutil + + +class AbbreviationFst(GraphFst): + """ + Finite state transducer for verbalizing abbreviations + e.g. tokens { abbreviation { value: "A B C" } } -> "ABC" + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="abbreviation", kind="verbalize", deterministic=deterministic) + + graph = pynutil.delete("value: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/cardinal.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/cardinal.py new file mode 100644 index 0000000..99531a0 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/cardinal.py @@ -0,0 +1,45 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space +from pynini.lib import pynutil + + +class CardinalFst(GraphFst): + """ + Finite state transducer for verbalizing cardinal, e.g. + cardinal { negative: "true" integer: "23" } -> minus twenty three + + Args: + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="cardinal", kind="verbalize", deterministic=deterministic) + + self.optional_sign = pynini.cross("negative: \"true\"", "minus ") + if not deterministic: + self.optional_sign |= pynini.cross("negative: \"true\"", "negative ") + self.optional_sign = pynini.closure(self.optional_sign + delete_space, 0, 1) + + integer = pynini.closure(NEMO_NOT_QUOTE) + + self.integer = delete_space + pynutil.delete("\"") + integer + pynutil.delete("\"") + integer = pynutil.delete("integer:") + self.integer + + self.numbers = self.optional_sign + integer + delete_tokens = self.delete_tokens(self.numbers) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/date.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/date.py new file mode 100644 index 0000000..191d010 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/date.py @@ -0,0 +1,101 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_NOT_QUOTE, + NEMO_SIGMA, + GraphFst, + delete_extra_space, + delete_space, +) +from pynini.examples import plurals +from pynini.lib import pynutil + + +class DateFst(GraphFst): + """ + Finite state transducer for verbalizing date, e.g. + date { month: "february" day: "five" year: "twenty twelve" preserve_order: true } -> february fifth twenty twelve + date { day: "five" month: "february" year: "twenty twelve" preserve_order: true } -> the fifth of february twenty twelve + + Args: + ordinal: OrdinalFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, ordinal: GraphFst, deterministic: bool = True, lm: bool = False): + super().__init__(name="date", kind="verbalize", deterministic=deterministic) + + month = pynini.closure(NEMO_NOT_QUOTE, 1) + day_cardinal = ( + pynutil.delete("day:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + day = day_cardinal @ ordinal.suffix + + month = pynutil.delete("month:") + delete_space + pynutil.delete("\"") + month + pynutil.delete("\"") + + year = ( + pynutil.delete("year:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + delete_space + + pynutil.delete("\"") + ) + + # month (day) year + graph_mdy = ( + month + pynini.closure(delete_extra_space + day, 0, 1) + pynini.closure(delete_extra_space + year, 0, 1) + ) + # may 5 -> may five + if not deterministic and not lm: + graph_mdy |= ( + month + + pynini.closure(delete_extra_space + day_cardinal, 0, 1) + + pynini.closure(delete_extra_space + year, 0, 1) + ) + + # day month year + graph_dmy = ( + pynutil.insert("the ") + + day + + delete_extra_space + + pynutil.insert("of ") + + month + + pynini.closure(delete_extra_space + year, 0, 1) + ) + + optional_preserve_order = pynini.closure( + pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space + | pynutil.delete("field_order:") + + delete_space + + pynutil.delete("\"") + + NEMO_NOT_QUOTE + + pynutil.delete("\"") + + delete_space + ) + + final_graph = ( + (plurals._priority_union(graph_mdy, pynutil.add_weight(graph_dmy, 0.0001), NEMO_SIGMA) | year) + + delete_space + + optional_preserve_order + ) + delete_tokens = self.delete_tokens(final_graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/decimal.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/decimal.py new file mode 100644 index 0000000..787bcea --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/decimal.py @@ -0,0 +1,67 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space, insert_space +from pynini.lib import pynutil + + +class DecimalFst(GraphFst): + """ + Finite state transducer for verbalizing decimal, e.g. + decimal { negative: "true" integer_part: "twelve" fractional_part: "five o o six" quantity: "billion" } -> minus twelve point five o o six billion + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, cardinal, deterministic: bool = True): + super().__init__(name="decimal", kind="verbalize", deterministic=deterministic) + self.optional_sign = pynini.cross("negative: \"true\"", "minus ") + if not deterministic: + self.optional_sign |= pynini.cross("negative: \"true\"", "negative ") + self.optional_sign = pynini.closure(self.optional_sign + delete_space, 0, 1) + self.integer = pynutil.delete("integer_part:") + cardinal.integer + self.optional_integer = pynini.closure(self.integer + delete_space + insert_space, 0, 1) + self.fractional_default = ( + pynutil.delete("fractional_part:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + + self.fractional = pynutil.insert("point ") + self.fractional_default + + self.quantity = ( + delete_space + + insert_space + + pynutil.delete("quantity:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + self.optional_quantity = pynini.closure(self.quantity, 0, 1) + + graph = self.optional_sign + ( + self.integer + | (self.integer + self.quantity) + | (self.optional_integer + self.fractional + self.optional_quantity) + ) + + self.numbers = graph + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/electronic.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/electronic.py new file mode 100644 index 0000000..884f125 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/electronic.py @@ -0,0 +1,97 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_NOT_QUOTE, + NEMO_NOT_SPACE, + NEMO_SIGMA, + TO_UPPER, + GraphFst, + delete_extra_space, + delete_space, + insert_space, +) +from nemo_text_processing.text_normalization.en.utils import get_abs_path +from pynini.examples import plurals +from pynini.lib import pynutil + + +class ElectronicFst(GraphFst): + """ + Finite state transducer for verbalizing electronic + e.g. tokens { electronic { username: "cdf1" domain: "abc.edu" } } -> c d f one at a b c dot e d u + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="electronic", kind="verbalize", deterministic=deterministic) + graph_digit_no_zero = pynini.invert(pynini.string_file(get_abs_path("data/number/digit.tsv"))).optimize() + graph_zero = pynini.cross("0", "zero") + + if not deterministic: + graph_zero |= pynini.cross("0", "o") | pynini.cross("0", "oh") + + graph_digit = graph_digit_no_zero | graph_zero + graph_symbols = pynini.string_file(get_abs_path("data/electronic/symbol.tsv")).optimize() + + default_chars_symbols = pynini.cdrewrite( + pynutil.insert(" ") + (graph_symbols | graph_digit) + pynutil.insert(" "), "", "", NEMO_SIGMA + ) + default_chars_symbols = pynini.compose( + pynini.closure(NEMO_NOT_SPACE), default_chars_symbols.optimize() + ).optimize() + + user_name = ( + pynutil.delete("username:") + + delete_space + + pynutil.delete("\"") + + default_chars_symbols + + pynutil.delete("\"") + ) + + domain_common = pynini.string_file(get_abs_path("data/electronic/domain.tsv")) + + domain = ( + default_chars_symbols + + insert_space + + plurals._priority_union( + domain_common, pynutil.add_weight(pynini.cross(".", "dot"), weight=0.0001), NEMO_SIGMA + ) + + pynini.closure( + insert_space + (pynini.cdrewrite(TO_UPPER, "", "", NEMO_SIGMA) @ default_chars_symbols), 0, 1 + ) + ) + domain = ( + pynutil.delete("domain:") + + delete_space + + pynutil.delete("\"") + + domain + + delete_space + + pynutil.delete("\"") + ).optimize() + + protocol = pynutil.delete("protocol: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") + graph = ( + pynini.closure(protocol + delete_space, 0, 1) + + pynini.closure(user_name + delete_space + pynutil.insert(" at ") + delete_space, 0, 1) + + domain + + delete_space + ).optimize() @ pynini.cdrewrite(delete_extra_space, "", "", NEMO_SIGMA) + + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/fraction.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/fraction.py new file mode 100644 index 0000000..d0c5dc2 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/fraction.py @@ -0,0 +1,88 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, NEMO_SIGMA, GraphFst, insert_space +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst +from pynini.examples import plurals +from pynini.lib import pynutil + + +class FractionFst(GraphFst): + """ + Finite state transducer for verbalizing fraction + e.g. tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } -> + twenty three and four fifth + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True, lm: bool = False): + super().__init__(name="fraction", kind="verbalize", deterministic=deterministic) + suffix = OrdinalFst().suffix + + integer = pynutil.delete("integer_part: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\" ") + denominator_one = pynini.cross("denominator: \"one\"", "over one") + denominator_half = pynini.cross("denominator: \"two\"", "half") + denominator_quarter = pynini.cross("denominator: \"four\"", "quarter") + + denominator_rest = ( + pynutil.delete("denominator: \"") + pynini.closure(NEMO_NOT_QUOTE) @ suffix + pynutil.delete("\"") + ) + + denominators = plurals._priority_union( + denominator_one, + plurals._priority_union( + denominator_half, + plurals._priority_union(denominator_quarter, denominator_rest, NEMO_SIGMA), + NEMO_SIGMA, + ), + NEMO_SIGMA, + ).optimize() + if not deterministic: + denominators |= pynutil.delete("denominator: \"") + (pynini.accep("four") @ suffix) + pynutil.delete("\"") + + numerator_one = pynutil.delete("numerator: \"") + pynini.accep("one") + pynutil.delete("\" ") + numerator_one = numerator_one + insert_space + denominators + numerator_rest = ( + pynutil.delete("numerator: \"") + + (pynini.closure(NEMO_NOT_QUOTE) - pynini.accep("one")) + + pynutil.delete("\" ") + ) + numerator_rest = numerator_rest + insert_space + denominators + numerator_rest @= pynini.cdrewrite( + plurals._priority_union(pynini.cross("half", "halves"), pynutil.insert("s"), NEMO_SIGMA), + "", + "[EOS]", + NEMO_SIGMA, + ) + + graph = numerator_one | numerator_rest + + conjunction = pynutil.insert("and ") + if not deterministic and not lm: + conjunction = pynini.closure(conjunction, 0, 1) + + integer = pynini.closure(integer + insert_space + conjunction, 0, 1) + + graph = integer + graph + graph @= pynini.cdrewrite( + pynini.cross("and one half", "and a half") | pynini.cross("over ones", "over one"), "", "[EOS]", NEMO_SIGMA + ) + + self.graph = graph + delete_tokens = self.delete_tokens(self.graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/measure.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/measure.py new file mode 100644 index 0000000..e4a23b3 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/measure.py @@ -0,0 +1,102 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space, insert_space +from pynini.lib import pynutil + + +class MeasureFst(GraphFst): + """ + Finite state transducer for verbalizing measure, e.g. + measure { negative: "true" cardinal { integer: "twelve" } units: "kilograms" } -> minus twelve kilograms + measure { decimal { integer_part: "twelve" fractional_part: "five" } units: "kilograms" } -> twelve point five kilograms + tokens { measure { units: "covid" decimal { integer_part: "nineteen" fractional_part: "five" } } } -> covid nineteen point five + + Args: + decimal: DecimalFst + cardinal: CardinalFst + fraction: FractionFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, decimal: GraphFst, cardinal: GraphFst, fraction: GraphFst, deterministic: bool = True): + super().__init__(name="measure", kind="verbalize", deterministic=deterministic) + optional_sign = cardinal.optional_sign + unit = ( + pynutil.delete("units: \"") + + pynini.difference(pynini.closure(NEMO_NOT_QUOTE, 1), pynini.union("address", "math")) + + pynutil.delete("\"") + + delete_space + ) + + if not deterministic: + unit |= pynini.compose(unit, pynini.cross(pynini.union("inch", "inches"), "\"")) + + graph_decimal = ( + pynutil.delete("decimal {") + + delete_space + + optional_sign + + delete_space + + decimal.numbers + + delete_space + + pynutil.delete("}") + ) + graph_cardinal = ( + pynutil.delete("cardinal {") + + delete_space + + optional_sign + + delete_space + + cardinal.numbers + + delete_space + + pynutil.delete("}") + ) + + graph_fraction = ( + pynutil.delete("fraction {") + delete_space + fraction.graph + delete_space + pynutil.delete("}") + ) + + graph = (graph_cardinal | graph_decimal | graph_fraction) + delete_space + insert_space + unit + + # SH adds "preserve_order: true" by default + preserve_order = pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space + graph |= unit + insert_space + (graph_cardinal | graph_decimal) + delete_space + pynini.closure(preserve_order) + # for only unit + graph |= ( + pynutil.delete("cardinal { integer: \"-\"") + + delete_space + + pynutil.delete("}") + + delete_space + + unit + + pynini.closure(preserve_order) + ) + address = ( + pynutil.delete("units: \"address\" ") + + delete_space + + graph_cardinal + + delete_space + + pynini.closure(preserve_order) + ) + math = ( + pynutil.delete("units: \"math\" ") + + delete_space + + graph_cardinal + + delete_space + + pynini.closure(preserve_order) + ) + graph |= address | math + + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/money.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/money.py new file mode 100644 index 0000000..b3cbc4a --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/money.py @@ -0,0 +1,71 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_NOT_QUOTE, + GraphFst, + delete_extra_space, + delete_preserve_order, +) +from pynini.lib import pynutil + + +class MoneyFst(GraphFst): + """ + Finite state transducer for verbalizing money, e.g. + money { integer_part: "twelve" fractional_part: "o five" currency: "dollars" } -> twelve o five dollars + + Args: + decimal: DecimalFst + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, decimal: GraphFst, deterministic: bool = True): + super().__init__(name="money", kind="verbalize", deterministic=deterministic) + keep_space = pynini.accep(" ") + maj = pynutil.delete("currency_maj: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") + min = pynutil.delete("currency_min: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") + + fractional_part = ( + pynutil.delete("fractional_part: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") + ) + + integer_part = decimal.integer + + # *** currency_maj + graph_integer = integer_part + keep_space + maj + + # *** currency_maj + (***) | ((and) *** current_min) + fractional = fractional_part + delete_extra_space + min + + if not deterministic: + fractional |= pynutil.insert("and ") + fractional + + graph_integer_with_minor = integer_part + keep_space + maj + keep_space + fractional + delete_preserve_order + + # *** point *** currency_maj + graph_decimal = decimal.numbers + keep_space + maj + + # *** current_min + graph_minor = fractional_part + delete_extra_space + min + delete_preserve_order + + graph = graph_integer | graph_integer_with_minor | graph_decimal | graph_minor + + if not deterministic: + graph |= graph_integer + delete_preserve_order + + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/ordinal.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/ordinal.py new file mode 100644 index 0000000..c64579a --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/ordinal.py @@ -0,0 +1,53 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, NEMO_SIGMA, GraphFst, delete_space +from nemo_text_processing.text_normalization.en.utils import get_abs_path +from pynini.lib import pynutil + + +class OrdinalFst(GraphFst): + """ + Finite state transducer for verbalizing ordinal, e.g. + ordinal { integer: "thirteen" } } -> thirteenth + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="ordinal", kind="verbalize", deterministic=deterministic) + + graph_digit = pynini.string_file(get_abs_path("data/ordinal/digit.tsv")).invert() + graph_teens = pynini.string_file(get_abs_path("data/ordinal/teen.tsv")).invert() + + graph = ( + pynutil.delete("integer:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + convert_rest = pynutil.insert("th") + + suffix = pynini.cdrewrite( + graph_digit | graph_teens | pynini.cross("ty", "tieth") | convert_rest, "", "[EOS]", NEMO_SIGMA, + ).optimize() + self.graph = pynini.compose(graph, suffix) + self.suffix = suffix + delete_tokens = self.delete_tokens(self.graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/post_processing.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/post_processing.py new file mode 100644 index 0000000..6c87da1 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/post_processing.py @@ -0,0 +1,180 @@ +# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + MIN_NEG_WEIGHT, + NEMO_ALPHA, + NEMO_CHAR, + NEMO_SIGMA, + NEMO_SPACE, + generator_main, +) +from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst +from pynini.lib import pynutil + + + +class PostProcessingFst: + """ + Finite state transducer that post-processing an entire sentence after verbalization is complete, e.g. + removes extra spaces around punctuation marks " ( one hundred and twenty three ) " -> "(one hundred and twenty three)" + + Args: + cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. + overwrite_cache: set to True to overwrite .far files + """ + + def __init__(self, cache_dir: str = None, overwrite_cache: bool = False): + + far_file = None + if cache_dir is not None and cache_dir != "None": + os.makedirs(cache_dir, exist_ok=True) + far_file = os.path.join(cache_dir, "en_tn_post_processing.far") + if not overwrite_cache and far_file and os.path.exists(far_file): + self.fst = pynini.Far(far_file, mode="r")["post_process_graph"] + else: + self.set_punct_dict() + self.fst = self.get_punct_postprocess_graph() + + if far_file: + generator_main(far_file, {"post_process_graph": self.fst}) + + def set_punct_dict(self): + self.punct_marks = { + "'": [ + "'", + '´', + 'ʹ', + 'ʻ', + 'ʼ', + 'ʽ', + 'ʾ', + 'ˈ', + 'ˊ', + 'ˋ', + '˴', + 'ʹ', + '΄', + '՚', + '՝', + 'י', + '׳', + 'ߴ', + 'ߵ', + 'ᑊ', + 'ᛌ', + '᾽', + '᾿', + '`', + '´', + '῾', + '‘', + '’', + '‛', + '′', + '‵', + 'ꞌ', + ''', + '`', + '𖽑', + '𖽒', + ], + } + + def get_punct_postprocess_graph(self): + """ + Returns graph to post process punctuation marks. + + {``} quotes are converted to {"}. Note, if there are spaces around single quote {'}, they will be kept. + By default, a space is added after a punctuation mark, and spaces are removed before punctuation marks. + """ + punct_marks_all = PunctuationFst().punct_marks + + # no_space_before_punct assume no space before them + quotes = ["'", "\"", "``", "«"] + dashes = ["-", "—"] + brackets = ["<", "{", "("] + open_close_single_quotes = [ + ("`", "`"), + ] + + open_close_double_quotes = [('"', '"'), ("``", "``"), ("“", "”")] + open_close_symbols = open_close_single_quotes + open_close_double_quotes + allow_space_before_punct = ["&"] + quotes + dashes + brackets + [k[0] for k in open_close_symbols] + + no_space_before_punct = [m for m in punct_marks_all if m not in allow_space_before_punct] + no_space_before_punct = pynini.union(*no_space_before_punct) + no_space_after_punct = pynini.union(*brackets) + delete_space = pynutil.delete(" ") + delete_space_optional = pynini.closure(delete_space, 0, 1) + + # non_punct allows space + # delete space before no_space_before_punct marks, if present + non_punct = pynini.difference(NEMO_CHAR, no_space_before_punct).optimize() + graph = ( + pynini.closure(non_punct) + + pynini.closure( + no_space_before_punct | pynutil.add_weight(delete_space + no_space_before_punct, MIN_NEG_WEIGHT) + ) + + pynini.closure(non_punct) + ) + graph = pynini.closure(graph).optimize() + graph = pynini.compose( + graph, pynini.cdrewrite(pynini.cross("``", '"'), "", "", NEMO_SIGMA).optimize() + ).optimize() + + # remove space after no_space_after_punct (even if there are no matching closing brackets) + no_space_after_punct = pynini.cdrewrite(delete_space, no_space_after_punct, NEMO_SIGMA, NEMO_SIGMA).optimize() + graph = pynini.compose(graph, no_space_after_punct).optimize() + + # remove space around text in quotes + single_quote = pynutil.add_weight(pynini.accep("`"), MIN_NEG_WEIGHT) + double_quotes = pynutil.add_weight(pynini.accep('"'), MIN_NEG_WEIGHT) + quotes_graph = ( + single_quote + delete_space_optional + NEMO_ALPHA + NEMO_SIGMA + delete_space_optional + single_quote + ).optimize() + + # this is to make sure multiple quotes are tagged from right to left without skipping any quotes in the left + not_alpha = pynini.difference(NEMO_CHAR, NEMO_ALPHA).optimize() | pynutil.add_weight( + NEMO_SPACE, MIN_NEG_WEIGHT + ) + end = pynini.closure(pynutil.add_weight(not_alpha, MIN_NEG_WEIGHT)) + quotes_graph |= ( + double_quotes + + delete_space_optional + + NEMO_ALPHA + + NEMO_SIGMA + + delete_space_optional + + double_quotes + + end + ) + + quotes_graph = pynutil.add_weight(quotes_graph, MIN_NEG_WEIGHT) + quotes_graph = NEMO_SIGMA + pynini.closure(NEMO_SIGMA + quotes_graph + NEMO_SIGMA) + + graph = pynini.compose(graph, quotes_graph).optimize() + + # remove space between a word and a single quote followed by s + remove_space_around_single_quote = pynini.cdrewrite( + delete_space_optional + pynini.union(*self.punct_marks["'"]) + delete_space, + NEMO_ALPHA, + pynini.union("s ", "s[EOS]"), + NEMO_SIGMA, + ) + + graph = pynini.compose(graph, remove_space_around_single_quote).optimize() + return graph diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/roman.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/roman.py new file mode 100644 index 0000000..43faebe --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/roman.py @@ -0,0 +1,68 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst +from pynini.lib import pynutil + + +class RomanFst(GraphFst): + """ + Finite state transducer for verbalizing roman numerals + e.g. tokens { roman { integer: "one" } } -> one + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="roman", kind="verbalize", deterministic=deterministic) + suffix = OrdinalFst().suffix + + cardinal = pynini.closure(NEMO_NOT_QUOTE) + ordinal = pynini.compose(cardinal, suffix) + + graph = ( + pynutil.delete("key_cardinal: \"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + + pynini.accep(" ") + + pynutil.delete("integer: \"") + + cardinal + + pynutil.delete("\"") + ).optimize() + + graph |= ( + pynutil.delete("default_cardinal: \"default\" integer: \"") + cardinal + pynutil.delete("\"") + ).optimize() + + graph |= ( + pynutil.delete("default_ordinal: \"default\" integer: \"") + ordinal + pynutil.delete("\"") + ).optimize() + + graph |= ( + pynutil.delete("key_the_ordinal: \"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + + pynini.accep(" ") + + pynutil.delete("integer: \"") + + pynini.closure(pynutil.insert("the "), 0, 1) + + ordinal + + pynutil.delete("\"") + ).optimize() + + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/telephone.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/telephone.py new file mode 100644 index 0000000..4af7bbb --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/telephone.py @@ -0,0 +1,63 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space, insert_space +from pynini.lib import pynutil + + +class TelephoneFst(GraphFst): + """ + Finite state transducer for verbalizing telephone numbers, e.g. + telephone { country_code: "one" number_part: "one two three, one two three, five six seven eight" extension: "one" } + -> one, one two three, one two three, five six seven eight, one + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="telephone", kind="verbalize", deterministic=deterministic) + + optional_country_code = pynini.closure( + pynutil.delete("country_code: \"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + + delete_space + + insert_space, + 0, + 1, + ) + + number_part = ( + pynutil.delete("number_part: \"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynini.closure(pynutil.add_weight(pynutil.delete(" "), -0.0001), 0, 1) + + pynutil.delete("\"") + ) + + optional_extension = pynini.closure( + delete_space + + insert_space + + pynutil.delete("extension: \"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\""), + 0, + 1, + ) + + graph = optional_country_code + number_part + optional_extension + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/time.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/time.py new file mode 100644 index 0000000..518c7df --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/time.py @@ -0,0 +1,102 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_NOT_QUOTE, + NEMO_SIGMA, + GraphFst, + delete_space, + insert_space, +) +from pynini.lib import pynutil + + +class TimeFst(GraphFst): + """ + Finite state transducer for verbalizing time, e.g. + time { hours: "twelve" minutes: "thirty" suffix: "a m" zone: "e s t" } -> twelve thirty a m e s t + time { hours: "twelve" } -> twelve o'clock + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="time", kind="verbalize", deterministic=deterministic) + hour = ( + pynutil.delete("hours:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + minute = ( + pynutil.delete("minutes:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + suffix = ( + pynutil.delete("suffix:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + optional_suffix = pynini.closure(delete_space + insert_space + suffix, 0, 1) + zone = ( + pynutil.delete("zone:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + optional_zone = pynini.closure(delete_space + insert_space + zone, 0, 1) + second = ( + pynutil.delete("seconds:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete("\"") + ) + graph_hms = ( + hour + + pynutil.insert(" hours ") + + delete_space + + minute + + pynutil.insert(" minutes and ") + + delete_space + + second + + pynutil.insert(" seconds") + + optional_suffix + + optional_zone + ) + graph_hms @= pynini.cdrewrite( + pynutil.delete("o ") + | pynini.cross("one minutes", "one minute") + | pynini.cross("one seconds", "one second") + | pynini.cross("one hours", "one hour"), + pynini.union(" ", "[BOS]"), + "", + NEMO_SIGMA, + ) + graph = hour + delete_space + insert_space + minute + optional_suffix + optional_zone + graph |= hour + insert_space + pynutil.insert("o'clock") + optional_zone + graph |= hour + delete_space + insert_space + suffix + optional_zone + graph |= graph_hms + delete_tokens = self.delete_tokens(graph) + self.fst = delete_tokens.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/verbalize.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/verbalize.py new file mode 100644 index 0000000..cd3b140 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/verbalize.py @@ -0,0 +1,82 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo_text_processing.text_normalization.en.graph_utils import GraphFst +from nemo_text_processing.text_normalization.en.verbalizers.abbreviation import AbbreviationFst +from nemo_text_processing.text_normalization.en.verbalizers.cardinal import CardinalFst +from nemo_text_processing.text_normalization.en.verbalizers.date import DateFst +from nemo_text_processing.text_normalization.en.verbalizers.decimal import DecimalFst +from nemo_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst +from nemo_text_processing.text_normalization.en.verbalizers.fraction import FractionFst +from nemo_text_processing.text_normalization.en.verbalizers.measure import MeasureFst +from nemo_text_processing.text_normalization.en.verbalizers.money import MoneyFst +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst +from nemo_text_processing.text_normalization.en.verbalizers.roman import RomanFst +from nemo_text_processing.text_normalization.en.verbalizers.telephone import TelephoneFst +from nemo_text_processing.text_normalization.en.verbalizers.time import TimeFst +from nemo_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst + + +class VerbalizeFst(GraphFst): + """ + Composes other verbalizer grammars. + For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. + More details to deployment at NeMo/tools/text_processing_deployment. + + Args: + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="verbalize", kind="verbalize", deterministic=deterministic) + cardinal = CardinalFst(deterministic=deterministic) + cardinal_graph = cardinal.fst + decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic) + decimal_graph = decimal.fst + ordinal = OrdinalFst(deterministic=deterministic) + ordinal_graph = ordinal.fst + fraction = FractionFst(deterministic=deterministic) + fraction_graph = fraction.fst + telephone_graph = TelephoneFst(deterministic=deterministic).fst + electronic_graph = ElectronicFst(deterministic=deterministic).fst + measure = MeasureFst(decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=deterministic) + measure_graph = measure.fst + time_graph = TimeFst(deterministic=deterministic).fst + date_graph = DateFst(ordinal=ordinal, deterministic=deterministic).fst + money_graph = MoneyFst(decimal=decimal, deterministic=deterministic).fst + whitelist_graph = WhiteListFst(deterministic=deterministic).fst + + graph = ( + time_graph + | date_graph + | money_graph + | measure_graph + | ordinal_graph + | decimal_graph + | cardinal_graph + | telephone_graph + | electronic_graph + | fraction_graph + | whitelist_graph + ) + + roman_graph = RomanFst(deterministic=deterministic).fst + graph |= roman_graph + + if not deterministic: + abbreviation_graph = AbbreviationFst(deterministic=deterministic).fst + graph |= abbreviation_graph + + self.fst = graph diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/verbalize_final.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/verbalize_final.py new file mode 100644 index 0000000..6564aff --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/verbalize_final.py @@ -0,0 +1,75 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import ( + GraphFst, + delete_extra_space, + delete_space, + generator_main, +) +from nemo_text_processing.text_normalization.en.verbalizers.verbalize import VerbalizeFst +from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst +from pynini.lib import pynutil + + + +class VerbalizeFinalFst(GraphFst): + """ + Finite state transducer that verbalizes an entire sentence, e.g. + tokens { name: "its" } tokens { time { hours: "twelve" minutes: "thirty" } } tokens { name: "now" } tokens { name: "." } -> its twelve thirty now . + + Args: + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. + overwrite_cache: set to True to overwrite .far files + """ + + def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False): + super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic) + + far_file = None + if cache_dir is not None and cache_dir != "None": + os.makedirs(cache_dir, exist_ok=True) + far_file = os.path.join(cache_dir, f"en_tn_{deterministic}_deterministic_verbalizer.far") + if not overwrite_cache and far_file and os.path.exists(far_file): + self.fst = pynini.Far(far_file, mode="r")["verbalize"] + + else: + verbalize = VerbalizeFst(deterministic=deterministic).fst + word = WordFst(deterministic=deterministic).fst + types = verbalize | word + + if deterministic: + graph = ( + pynutil.delete("tokens") + + delete_space + + pynutil.delete("{") + + delete_space + + types + + delete_space + + pynutil.delete("}") + ) + else: + graph = delete_space + types + delete_space + + graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space + + self.fst = graph.optimize() + if far_file: + generator_main(far_file, {"verbalize": self.fst}) + diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/whitelist.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/whitelist.py new file mode 100644 index 0000000..96aa207 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/whitelist.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space +from pynini.lib import pynutil + + +class WhiteListFst(GraphFst): + """ + Finite state transducer for verbalizing whitelist + e.g. tokens { name: "misses" } } -> misses + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="whitelist", kind="verbalize", deterministic=deterministic) + graph = ( + pynutil.delete("name:") + + delete_space + + pynutil.delete("\"") + + pynini.closure(NEMO_CHAR - " ", 1) + + pynutil.delete("\"") + ) + graph = graph @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "", NEMO_SIGMA) + self.fst = graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/word.py b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/word.py new file mode 100644 index 0000000..e124f42 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/en/verbalizers/word.py @@ -0,0 +1,35 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pynini +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space +from pynini.lib import pynutil + + +class WordFst(GraphFst): + """ + Finite state transducer for verbalizing word + e.g. tokens { name: "sleep" } -> sleep + + Args: + deterministic: if True will provide a single transduction option, + for False multiple transduction are generated (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="word", kind="verbalize", deterministic=deterministic) + chars = pynini.closure(NEMO_CHAR - " ", 1) + char = pynutil.delete("name:") + delete_space + pynutil.delete("\"") + chars + pynutil.delete("\"") + graph = char @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "", NEMO_SIGMA) + + self.fst = graph.optimize() diff --git a/utils/speechio/nemo_text_processing/text_normalization/normalize.py b/utils/speechio/nemo_text_processing/text_normalization/normalize.py new file mode 100644 index 0000000..d22ef8c --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/normalize.py @@ -0,0 +1,479 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import os +import re +from argparse import ArgumentParser +from collections import OrderedDict +from math import factorial +from time import perf_counter +from typing import Dict, List, Union + +import pynini +import regex +from nemo_text_processing.text_normalization.data_loader_utils import ( + load_file, + post_process_punct, + pre_process, + write_file, +) +from nemo_text_processing.text_normalization.token_parser import PRESERVE_ORDER_KEY, TokenParser +from pynini.lib.rewrite import top_rewrite + +SPACE_DUP = re.compile(' {2,}') + + +class Normalizer: + """ + Normalizer class that converts text from written to spoken form. + Useful for TTS preprocessing. + + Args: + input_case: expected input capitalization + lang: language specifying the TN rules, by default: English + cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. + overwrite_cache: set to True to overwrite .far files + whitelist: path to a file with whitelist replacements + post_process: WFST-based post processing, e.g. to remove extra spaces added during TN. + Note: punct_post_process flag in normalize() supports all languages. + """ + + def __init__( + self, + input_case: str, + lang: str = 'en', + deterministic: bool = True, + cache_dir: str = None, + overwrite_cache: bool = False, + whitelist: str = None, + lm: bool = False, + post_process: bool = True, + ): + assert input_case in ["lower_cased", "cased"] + + self.post_processor = None + + if lang == "en": + from nemo_text_processing.text_normalization.en.verbalizers.post_processing import PostProcessingFst + from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst + + if post_process: + self.post_processor = PostProcessingFst(cache_dir=cache_dir, overwrite_cache=overwrite_cache) + + if deterministic: + from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst + else: + if lm: + from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_lm import ClassifyFst + else: + from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import ( + ClassifyFst, + ) + + elif lang == 'ru': + # Ru TN only support non-deterministic cases and produces multiple normalization options + # use normalize_with_audio.py + from nemo_text_processing.text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst + from nemo_text_processing.text_normalization.ru.verbalizers.verbalize_final import VerbalizeFinalFst + elif lang == 'de': + from nemo_text_processing.text_normalization.de.taggers.tokenize_and_classify import ClassifyFst + from nemo_text_processing.text_normalization.de.verbalizers.verbalize_final import VerbalizeFinalFst + elif lang == 'es': + from nemo_text_processing.text_normalization.es.taggers.tokenize_and_classify import ClassifyFst + from nemo_text_processing.text_normalization.es.verbalizers.verbalize_final import VerbalizeFinalFst + self.tagger = ClassifyFst( + input_case=input_case, + deterministic=deterministic, + cache_dir=cache_dir, + overwrite_cache=overwrite_cache, + whitelist=whitelist, + ) + + self.verbalizer = VerbalizeFinalFst( + deterministic=deterministic, cache_dir=cache_dir, overwrite_cache=overwrite_cache + ) + + self.parser = TokenParser() + self.lang = lang + + self.processor = 0 + + def __process_batch(self, batch, verbose, punct_pre_process, punct_post_process): + """ + Normalizes batch of text sequences + Args: + batch: list of texts + verbose: whether to print intermediate meta information + punct_pre_process: whether to do punctuation pre processing + punct_post_process: whether to do punctuation post processing + """ + normalized_lines = [ + self.normalize( + text, verbose=verbose, punct_pre_process=punct_pre_process, punct_post_process=punct_post_process + ) + for text in tqdm(batch) + ] + return normalized_lines + + def _estimate_number_of_permutations_in_nested_dict( + self, token_group: Dict[str, Union[OrderedDict, str, bool]] + ) -> int: + num_perms = 1 + for k, inner in token_group.items(): + if isinstance(inner, dict): + num_perms *= self._estimate_number_of_permutations_in_nested_dict(inner) + num_perms *= factorial(len(token_group)) + return num_perms + + def _split_tokens_to_reduce_number_of_permutations( + self, tokens: List[dict], max_number_of_permutations_per_split: int = 729 + ) -> List[List[dict]]: + """ + Splits a sequence of tokens in a smaller sequences of tokens in a way that maximum number of composite + tokens permutations does not exceed ``max_number_of_permutations_per_split``. + + For example, + + .. code-block:: python + tokens = [ + {"tokens": {"date": {"year": "twenty eighteen", "month": "december", "day": "thirty one"}}}, + {"tokens": {"date": {"year": "twenty eighteen", "month": "january", "day": "eight"}}}, + ] + split = normalizer._split_tokens_to_reduce_number_of_permutations( + tokens, max_number_of_permutations_per_split=6 + ) + assert split == [ + [{"tokens": {"date": {"year": "twenty eighteen", "month": "december", "day": "thirty one"}}}], + [{"tokens": {"date": {"year": "twenty eighteen", "month": "january", "day": "eight"}}}], + ] + + Date tokens contain 3 items each which gives 6 permutations for every date. Since there are 2 dates, total + number of permutations would be ``6 * 6 == 36``. Parameter ``max_number_of_permutations_per_split`` equals 6, + so input sequence of tokens is split into 2 smaller sequences. + + Args: + tokens (:obj:`List[dict]`): a list of dictionaries, possibly nested. + max_number_of_permutations_per_split (:obj:`int`, `optional`, defaults to :obj:`243`): a maximum number + of permutations which can be generated from input sequence of tokens. + + Returns: + :obj:`List[List[dict]]`: a list of smaller sequences of tokens resulting from ``tokens`` split. + """ + splits = [] + prev_end_of_split = 0 + current_number_of_permutations = 1 + for i, token_group in enumerate(tokens): + n = self._estimate_number_of_permutations_in_nested_dict(token_group) + if n * current_number_of_permutations > max_number_of_permutations_per_split: + splits.append(tokens[prev_end_of_split:i]) + prev_end_of_split = i + current_number_of_permutations = 1 + if n > max_number_of_permutations_per_split: + raise ValueError( + f"Could not split token list with respect to condition that every split can generate number of " + f"permutations less or equal to " + f"`max_number_of_permutations_per_split={max_number_of_permutations_per_split}`. " + f"There is an unsplittable token group that generates more than " + f"{max_number_of_permutations_per_split} permutations. Try to increase " + f"`max_number_of_permutations_per_split` parameter." + ) + current_number_of_permutations *= n + splits.append(tokens[prev_end_of_split:]) + assert sum([len(s) for s in splits]) == len(tokens) + return splits + + def normalize( + self, text: str, verbose: bool = False, punct_pre_process: bool = False, punct_post_process: bool = False + ) -> str: + """ + Main function. Normalizes tokens from written to spoken form + e.g. 12 kg -> twelve kilograms + + Args: + text: string that may include semiotic classes + verbose: whether to print intermediate meta information + punct_pre_process: whether to perform punctuation pre-processing, for example, [25] -> [ 25 ] + punct_post_process: whether to normalize punctuation + + Returns: spoken form + """ + + original_text = text + if punct_pre_process: + text = pre_process(text) + text = text.strip() + if not text: + if verbose: + print(text) + return text + text = pynini.escape(text) + tagged_lattice = self.find_tags(text) + tagged_text = self.select_tag(tagged_lattice) + if verbose: + print(tagged_text) + self.parser(tagged_text) + tokens = self.parser.parse() + split_tokens = self._split_tokens_to_reduce_number_of_permutations(tokens) + output = "" + for s in split_tokens: + tags_reordered = self.generate_permutations(s) + verbalizer_lattice = None + for tagged_text in tags_reordered: + tagged_text = pynini.escape(tagged_text) + + verbalizer_lattice = self.find_verbalizer(tagged_text) + if verbalizer_lattice.num_states() != 0: + break + if verbalizer_lattice is None: + raise ValueError(f"No permutations were generated from tokens {s}") + output += ' ' + self.select_verbalizer(verbalizer_lattice) + output = SPACE_DUP.sub(' ', output[1:]) + + if self.lang == "en" and hasattr(self, 'post_processor'): + output = self.post_process(output) + + if punct_post_process: + # do post-processing based on Moses detokenizer + if self.processor: + output = self.processor.moses_detokenizer.detokenize([output], unescape=False) + output = post_process_punct(input=original_text, normalized_text=output) + else: + print("NEMO_NLP collection is not available: skipping punctuation post_processing") + + return output + + def split_text_into_sentences(self, text: str) -> List[str]: + """ + Split text into sentences. + + Args: + text: text + + Returns list of sentences + """ + lower_case_unicode = '' + upper_case_unicode = '' + if self.lang == "ru": + lower_case_unicode = '\u0430-\u04FF' + upper_case_unicode = '\u0410-\u042F' + + # Read and split transcript by utterance (roughly, sentences) + split_pattern = f"(? List[str]: + """ + Creates reorderings of dictionary elements and serializes as strings + + Args: + d: (nested) dictionary of key value pairs + + Return permutations of different string serializations of key value pairs + """ + l = [] + if PRESERVE_ORDER_KEY in d.keys(): + d_permutations = [d.items()] + else: + d_permutations = itertools.permutations(d.items()) + for perm in d_permutations: + subl = [""] + for k, v in perm: + if isinstance(v, str): + subl = ["".join(x) for x in itertools.product(subl, [f"{k}: \"{v}\" "])] + elif isinstance(v, OrderedDict): + rec = self._permute(v) + subl = ["".join(x) for x in itertools.product(subl, [f" {k} {{ "], rec, [f" }} "])] + elif isinstance(v, bool): + subl = ["".join(x) for x in itertools.product(subl, [f"{k}: true "])] + else: + raise ValueError() + l.extend(subl) + return l + + def generate_permutations(self, tokens: List[dict]): + """ + Generates permutations of string serializations of list of dictionaries + + Args: + tokens: list of dictionaries + + Returns string serialization of list of dictionaries + """ + + def _helper(prefix: str, tokens: List[dict], idx: int): + """ + Generates permutations of string serializations of given dictionary + + Args: + tokens: list of dictionaries + prefix: prefix string + idx: index of next dictionary + + Returns string serialization of dictionary + """ + if idx == len(tokens): + yield prefix + return + token_options = self._permute(tokens[idx]) + for token_option in token_options: + yield from _helper(prefix + token_option, tokens, idx + 1) + + return _helper("", tokens, 0) + + def find_tags(self, text: str) -> 'pynini.FstLike': + """ + Given text use tagger Fst to tag text + + Args: + text: sentence + + Returns: tagged lattice + """ + lattice = text @ self.tagger.fst + return lattice + + def select_tag(self, lattice: 'pynini.FstLike') -> str: + """ + Given tagged lattice return shortest path + + Args: + tagged_text: tagged text + + Returns: shortest path + """ + tagged_text = pynini.shortestpath(lattice, nshortest=1, unique=True).string() + return tagged_text + + def find_verbalizer(self, tagged_text: str) -> 'pynini.FstLike': + """ + Given tagged text creates verbalization lattice + This is context-independent. + + Args: + tagged_text: input text + + Returns: verbalized lattice + """ + lattice = tagged_text @ self.verbalizer.fst + return lattice + + def select_verbalizer(self, lattice: 'pynini.FstLike') -> str: + """ + Given verbalized lattice return shortest path + + Args: + lattice: verbalization lattice + + Returns: shortest path + """ + output = pynini.shortestpath(lattice, nshortest=1, unique=True).string() + # lattice = output @ self.verbalizer.punct_graph + # output = pynini.shortestpath(lattice, nshortest=1, unique=True).string() + return output + + def post_process(self, normalized_text: 'pynini.FstLike') -> str: + """ + Runs post processing graph on normalized text + + Args: + normalized_text: normalized text + + Returns: shortest path + """ + normalized_text = normalized_text.strip() + if not normalized_text: + return normalized_text + normalized_text = pynini.escape(normalized_text) + + if self.post_processor is not None: + normalized_text = top_rewrite(normalized_text, self.post_processor.fst) + return normalized_text + + +def parse_args(): + parser = ArgumentParser() + input = parser.add_mutually_exclusive_group() + input.add_argument("--text", dest="input_string", help="input string", type=str) + input.add_argument("--input_file", dest="input_file", help="input file path", type=str) + parser.add_argument('--output_file', dest="output_file", help="output file path", type=str) + parser.add_argument("--language", help="language", choices=["en", "de", "es"], default="en", type=str) + parser.add_argument( + "--input_case", help="input capitalization", choices=["lower_cased", "cased"], default="cased", type=str + ) + parser.add_argument("--verbose", help="print info for debugging", action='store_true') + parser.add_argument( + "--punct_post_process", + help="set to True to enable punctuation post processing to match input.", + action="store_true", + ) + parser.add_argument( + "--punct_pre_process", help="set to True to enable punctuation pre processing", action="store_true" + ) + parser.add_argument("--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true") + parser.add_argument("--whitelist", help="path to a file with with whitelist", default=None, type=str) + parser.add_argument( + "--cache_dir", + help="path to a dir with .far grammar file. Set to None to avoid using cache", + default=None, + type=str, + ) + return parser.parse_args() + + +if __name__ == "__main__": + start_time = perf_counter() + + args = parse_args() + whitelist = os.path.abspath(args.whitelist) if args.whitelist else None + + if not args.input_string and not args.input_file: + raise ValueError("Either `--text` or `--input_file` required") + + normalizer = Normalizer( + input_case=args.input_case, + cache_dir=args.cache_dir, + overwrite_cache=args.overwrite_cache, + whitelist=whitelist, + lang=args.language, + ) + if args.input_string: + print( + normalizer.normalize( + args.input_string, + verbose=args.verbose, + punct_pre_process=args.punct_pre_process, + punct_post_process=args.punct_post_process, + ) + ) + elif args.input_file: + print("Loading data: " + args.input_file) + data = load_file(args.input_file) + + print("- Data: " + str(len(data)) + " sentences") + normalizer_prediction = normalizer.normalize_list( + data, + verbose=args.verbose, + punct_pre_process=args.punct_pre_process, + punct_post_process=args.punct_post_process, + ) + if args.output_file: + write_file(args.output_file, normalizer_prediction) + print(f"- Normalized. Writing out to {args.output_file}") + else: + print(normalizer_prediction) + + print(f"Execution time: {perf_counter() - start_time:.02f} sec") diff --git a/utils/speechio/nemo_text_processing/text_normalization/normalize_with_audio.py b/utils/speechio/nemo_text_processing/text_normalization/normalize_with_audio.py new file mode 100644 index 0000000..89927b2 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/normalize_with_audio.py @@ -0,0 +1,543 @@ +# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import time +from argparse import ArgumentParser +from glob import glob +from typing import List, Tuple + +import pynini +from joblib import Parallel, delayed +from nemo_text_processing.text_normalization.data_loader_utils import post_process_punct, pre_process +from nemo_text_processing.text_normalization.normalize import Normalizer +from pynini.lib import rewrite +from tqdm import tqdm + +try: + from nemo.collections.asr.metrics.wer import word_error_rate + from nemo.collections.asr.models import ASRModel + + ASR_AVAILABLE = True +except (ModuleNotFoundError, ImportError): + ASR_AVAILABLE = False + + +""" +The script provides multiple normalization options and chooses the best one that minimizes CER of the ASR output +(most of the semiotic classes use deterministic=False flag). + +To run this script with a .json manifest file, the manifest file should contain the following fields: + "audio_data" - path to the audio file + "text" - raw text + "pred_text" - ASR model prediction + + See https://github.com/NVIDIA/NeMo/blob/main/examples/asr/transcribe_speech.py on how to add ASR predictions + + When the manifest is ready, run: + python normalize_with_audio.py \ + --audio_data PATH/TO/MANIFEST.JSON \ + --language en + + +To run with a single audio file, specify path to audio and text with: + python normalize_with_audio.py \ + --audio_data PATH/TO/AUDIO.WAV \ + --language en \ + --text raw text OR PATH/TO/.TXT/FILE + --model QuartzNet15x5Base-En \ + --verbose + +To see possible normalization options for a text input without an audio file (could be used for debugging), run: + python python normalize_with_audio.py --text "RAW TEXT" + +Specify `--cache_dir` to generate .far grammars once and re-used them for faster inference +""" + + +class NormalizerWithAudio(Normalizer): + """ + Normalizer class that converts text from written to spoken form. + Useful for TTS preprocessing. + + Args: + input_case: expected input capitalization + lang: language + cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. + overwrite_cache: set to True to overwrite .far files + whitelist: path to a file with whitelist replacements + post_process: WFST-based post processing, e.g. to remove extra spaces added during TN. + Note: punct_post_process flag in normalize() supports all languages. + """ + + def __init__( + self, + input_case: str, + lang: str = 'en', + cache_dir: str = None, + overwrite_cache: bool = False, + whitelist: str = None, + lm: bool = False, + post_process: bool = True, + ): + + super().__init__( + input_case=input_case, + lang=lang, + deterministic=False, + cache_dir=cache_dir, + overwrite_cache=overwrite_cache, + whitelist=whitelist, + lm=lm, + post_process=post_process, + ) + self.lm = lm + + def normalize(self, text: str, n_tagged: int, punct_post_process: bool = True, verbose: bool = False,) -> str: + """ + Main function. Normalizes tokens from written to spoken form + e.g. 12 kg -> twelve kilograms + + Args: + text: string that may include semiotic classes + n_tagged: number of tagged options to consider, -1 - to get all possible tagged options + punct_post_process: whether to normalize punctuation + verbose: whether to print intermediate meta information + + Returns: + normalized text options (usually there are multiple ways of normalizing a given semiotic class) + """ + + if len(text.split()) > 500: + raise ValueError( + "Your input is too long. Please split up the input into sentences, " + "or strings with fewer than 500 words" + ) + + original_text = text + text = pre_process(text) # to handle [] + + text = text.strip() + if not text: + if verbose: + print(text) + return text + text = pynini.escape(text) + print(text) + + if self.lm: + if self.lang not in ["en"]: + raise ValueError(f"{self.lang} is not supported in LM mode") + + if self.lang == "en": + # this to keep arpabet phonemes in the list of options + if "[" in text and "]" in text: + + lattice = rewrite.rewrite_lattice(text, self.tagger.fst) + else: + try: + lattice = rewrite.rewrite_lattice(text, self.tagger.fst_no_digits) + except pynini.lib.rewrite.Error: + lattice = rewrite.rewrite_lattice(text, self.tagger.fst) + lattice = rewrite.lattice_to_nshortest(lattice, n_tagged) + tagged_texts = [(x[1], float(x[2])) for x in lattice.paths().items()] + tagged_texts.sort(key=lambda x: x[1]) + tagged_texts, weights = list(zip(*tagged_texts)) + else: + tagged_texts = self._get_tagged_text(text, n_tagged) + # non-deterministic Eng normalization uses tagger composed with verbalizer, no permutation in between + if self.lang == "en": + normalized_texts = tagged_texts + normalized_texts = [self.post_process(text) for text in normalized_texts] + else: + normalized_texts = [] + for tagged_text in tagged_texts: + self._verbalize(tagged_text, normalized_texts, verbose=verbose) + + if len(normalized_texts) == 0: + raise ValueError() + + if punct_post_process: + # do post-processing based on Moses detokenizer + if self.processor: + normalized_texts = [self.processor.detokenize([t]) for t in normalized_texts] + normalized_texts = [ + post_process_punct(input=original_text, normalized_text=t) for t in normalized_texts + ] + + if self.lm: + remove_dup = sorted(list(set(zip(normalized_texts, weights))), key=lambda x: x[1]) + normalized_texts, weights = zip(*remove_dup) + return list(normalized_texts), weights + + normalized_texts = set(normalized_texts) + return normalized_texts + + def _get_tagged_text(self, text, n_tagged): + """ + Returns text after tokenize and classify + Args; + text: input text + n_tagged: number of tagged options to consider, -1 - return all possible tagged options + """ + if n_tagged == -1: + if self.lang == "en": + # this to keep arpabet phonemes in the list of options + if "[" in text and "]" in text: + tagged_texts = rewrite.rewrites(text, self.tagger.fst) + else: + try: + tagged_texts = rewrite.rewrites(text, self.tagger.fst_no_digits) + except pynini.lib.rewrite.Error: + tagged_texts = rewrite.rewrites(text, self.tagger.fst) + else: + tagged_texts = rewrite.rewrites(text, self.tagger.fst) + else: + if self.lang == "en": + # this to keep arpabet phonemes in the list of options + if "[" in text and "]" in text: + tagged_texts = rewrite.top_rewrites(text, self.tagger.fst, nshortest=n_tagged) + else: + try: + # try self.tagger graph that produces output without digits + tagged_texts = rewrite.top_rewrites(text, self.tagger.fst_no_digits, nshortest=n_tagged) + except pynini.lib.rewrite.Error: + tagged_texts = rewrite.top_rewrites(text, self.tagger.fst, nshortest=n_tagged) + else: + tagged_texts = rewrite.top_rewrites(text, self.tagger.fst, nshortest=n_tagged) + return tagged_texts + + def _verbalize(self, tagged_text: str, normalized_texts: List[str], verbose: bool = False): + """ + Verbalizes tagged text + + Args: + tagged_text: text with tags + normalized_texts: list of possible normalization options + verbose: if true prints intermediate classification results + """ + + def get_verbalized_text(tagged_text): + return rewrite.rewrites(tagged_text, self.verbalizer.fst) + + self.parser(tagged_text) + tokens = self.parser.parse() + tags_reordered = self.generate_permutations(tokens) + for tagged_text_reordered in tags_reordered: + try: + tagged_text_reordered = pynini.escape(tagged_text_reordered) + normalized_texts.extend(get_verbalized_text(tagged_text_reordered)) + if verbose: + print(tagged_text_reordered) + + except pynini.lib.rewrite.Error: + continue + + def select_best_match( + self, + normalized_texts: List[str], + input_text: str, + pred_text: str, + verbose: bool = False, + remove_punct: bool = False, + cer_threshold: int = 100, + ): + """ + Selects the best normalization option based on the lowest CER + + Args: + normalized_texts: normalized text options + input_text: input text + pred_text: ASR model transcript of the audio file corresponding to the normalized text + verbose: whether to print intermediate meta information + remove_punct: whether to remove punctuation before calculating CER + cer_threshold: if CER for pred_text is above the cer_threshold, no normalization will be performed + + Returns: + normalized text with the lowest CER and CER value + """ + if pred_text == "": + return input_text, cer_threshold + + normalized_texts_cer = calculate_cer(normalized_texts, pred_text, remove_punct) + normalized_texts_cer = sorted(normalized_texts_cer, key=lambda x: x[1]) + normalized_text, cer = normalized_texts_cer[0] + + if cer > cer_threshold: + return input_text, cer + + if verbose: + print('-' * 30) + for option in normalized_texts: + print(option) + print('-' * 30) + return normalized_text, cer + + +def calculate_cer(normalized_texts: List[str], pred_text: str, remove_punct=False) -> List[Tuple[str, float]]: + """ + Calculates character error rate (CER) + + Args: + normalized_texts: normalized text options + pred_text: ASR model output + + Returns: normalized options with corresponding CER + """ + normalized_options = [] + for text in normalized_texts: + text_clean = text.replace('-', ' ').lower() + if remove_punct: + for punct in "!?:;,.-()*+-/<=>@^_": + text_clean = text_clean.replace(punct, "") + cer = round(word_error_rate([pred_text], [text_clean], use_cer=True) * 100, 2) + normalized_options.append((text, cer)) + return normalized_options + + +def get_asr_model(asr_model): + """ + Returns ASR Model + + Args: + asr_model: NeMo ASR model + """ + if os.path.exists(args.model): + asr_model = ASRModel.restore_from(asr_model) + elif args.model in ASRModel.get_available_model_names(): + asr_model = ASRModel.from_pretrained(asr_model) + else: + raise ValueError( + f'Provide path to the pretrained checkpoint or choose from {ASRModel.get_available_model_names()}' + ) + return asr_model + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument("--text", help="input string or path to a .txt file", default=None, type=str) + parser.add_argument( + "--input_case", help="input capitalization", choices=["lower_cased", "cased"], default="cased", type=str + ) + parser.add_argument( + "--language", help="Select target language", choices=["en", "ru", "de", "es"], default="en", type=str + ) + parser.add_argument("--audio_data", default=None, help="path to an audio file or .json manifest") + parser.add_argument( + '--model', type=str, default='QuartzNet15x5Base-En', help='Pre-trained model name or path to model checkpoint' + ) + parser.add_argument( + "--n_tagged", + type=int, + default=30, + help="number of tagged options to consider, -1 - return all possible tagged options", + ) + parser.add_argument("--verbose", help="print info for debugging", action="store_true") + parser.add_argument( + "--no_remove_punct_for_cer", + help="Set to True to NOT remove punctuation before calculating CER", + action="store_true", + ) + parser.add_argument( + "--no_punct_post_process", help="set to True to disable punctuation post processing", action="store_true" + ) + parser.add_argument("--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true") + parser.add_argument("--whitelist", help="path to a file with with whitelist", default=None, type=str) + parser.add_argument( + "--cache_dir", + help="path to a dir with .far grammar file. Set to None to avoid using cache", + default=None, + type=str, + ) + parser.add_argument("--n_jobs", default=-2, type=int, help="The maximum number of concurrently running jobs") + parser.add_argument( + "--lm", action="store_true", help="Set to True for WFST+LM. Only available for English right now." + ) + parser.add_argument( + "--cer_threshold", + default=100, + type=int, + help="if CER for pred_text is above the cer_threshold, no normalization will be performed", + ) + parser.add_argument("--batch_size", default=200, type=int, help="Number of examples for each process") + return parser.parse_args() + + +def _normalize_line( + normalizer: NormalizerWithAudio, n_tagged, verbose, line: str, remove_punct, punct_post_process, cer_threshold +): + line = json.loads(line) + pred_text = line["pred_text"] + + normalized_texts = normalizer.normalize( + text=line["text"], verbose=verbose, n_tagged=n_tagged, punct_post_process=punct_post_process, + ) + + normalized_texts = set(normalized_texts) + normalized_text, cer = normalizer.select_best_match( + normalized_texts=normalized_texts, + input_text=line["text"], + pred_text=pred_text, + verbose=verbose, + remove_punct=remove_punct, + cer_threshold=cer_threshold, + ) + line["nemo_normalized"] = normalized_text + line["CER_nemo_normalized"] = cer + return line + + +def normalize_manifest( + normalizer, + audio_data: str, + n_jobs: int, + n_tagged: int, + remove_punct: bool, + punct_post_process: bool, + batch_size: int, + cer_threshold: int, +): + """ + Args: + args.audio_data: path to .json manifest file. + """ + + def __process_batch(batch_idx: int, batch: List[str], dir_name: str): + """ + Normalizes batch of text sequences + Args: + batch: list of texts + batch_idx: batch index + dir_name: path to output directory to save results + """ + normalized_lines = [ + _normalize_line( + normalizer, + n_tagged, + verbose=False, + line=line, + remove_punct=remove_punct, + punct_post_process=punct_post_process, + cer_threshold=cer_threshold, + ) + for line in tqdm(batch) + ] + + with open(f"{dir_name}/{batch_idx:05}.json", "w") as f_out: + for line in normalized_lines: + f_out.write(json.dumps(line, ensure_ascii=False) + '\n') + + print(f"Batch -- {batch_idx} -- is complete") + + manifest_out = audio_data.replace('.json', '_normalized.json') + with open(audio_data, 'r') as f: + lines = f.readlines() + + print(f'Normalizing {len(lines)} lines of {audio_data}...') + + # to save intermediate results to a file + batch = min(len(lines), batch_size) + + tmp_dir = manifest_out.replace(".json", "_parts") + os.makedirs(tmp_dir, exist_ok=True) + + Parallel(n_jobs=n_jobs)( + delayed(__process_batch)(idx, lines[i : i + batch], tmp_dir) + for idx, i in enumerate(range(0, len(lines), batch)) + ) + + # aggregate all intermediate files + with open(manifest_out, "w") as f_out: + for batch_f in sorted(glob(f"{tmp_dir}/*.json")): + with open(batch_f, "r") as f_in: + lines = f_in.read() + f_out.write(lines) + + print(f'Normalized version saved at {manifest_out}') + + +if __name__ == "__main__": + args = parse_args() + + if not ASR_AVAILABLE and args.audio_data: + raise ValueError("NeMo ASR collection is not installed.") + start = time.time() + args.whitelist = os.path.abspath(args.whitelist) if args.whitelist else None + if args.text is not None: + normalizer = NormalizerWithAudio( + input_case=args.input_case, + lang=args.language, + cache_dir=args.cache_dir, + overwrite_cache=args.overwrite_cache, + whitelist=args.whitelist, + lm=args.lm, + ) + + if os.path.exists(args.text): + with open(args.text, 'r') as f: + args.text = f.read().strip() + normalized_texts = normalizer.normalize( + text=args.text, + verbose=args.verbose, + n_tagged=args.n_tagged, + punct_post_process=not args.no_punct_post_process, + ) + + if not normalizer.lm: + normalized_texts = set(normalized_texts) + if args.audio_data: + asr_model = get_asr_model(args.model) + pred_text = asr_model.transcribe([args.audio_data])[0] + normalized_text, cer = normalizer.select_best_match( + normalized_texts=normalized_texts, + pred_text=pred_text, + input_text=args.text, + verbose=args.verbose, + remove_punct=not args.no_remove_punct_for_cer, + cer_threshold=args.cer_threshold, + ) + print(f"Transcript: {pred_text}") + print(f"Normalized: {normalized_text}") + else: + print("Normalization options:") + for norm_text in normalized_texts: + print(norm_text) + elif not os.path.exists(args.audio_data): + raise ValueError(f"{args.audio_data} not found.") + elif args.audio_data.endswith('.json'): + normalizer = NormalizerWithAudio( + input_case=args.input_case, + lang=args.language, + cache_dir=args.cache_dir, + overwrite_cache=args.overwrite_cache, + whitelist=args.whitelist, + ) + normalize_manifest( + normalizer=normalizer, + audio_data=args.audio_data, + n_jobs=args.n_jobs, + n_tagged=args.n_tagged, + remove_punct=not args.no_remove_punct_for_cer, + punct_post_process=not args.no_punct_post_process, + batch_size=args.batch_size, + cer_threshold=args.cer_threshold, + ) + else: + raise ValueError( + "Provide either path to .json manifest in '--audio_data' OR " + + "'--audio_data' path to audio file and '--text' path to a text file OR" + "'--text' string text (for debugging without audio)" + ) + print(f'Execution time: {round((time.time() - start)/60, 2)} min.') diff --git a/utils/speechio/nemo_text_processing/text_normalization/run_evaluate.py b/utils/speechio/nemo_text_processing/text_normalization/run_evaluate.py new file mode 100644 index 0000000..5f23dbd --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/run_evaluate.py @@ -0,0 +1,117 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argparse import ArgumentParser + +from nemo_text_processing.text_normalization.data_loader_utils import ( + evaluate, + known_types, + load_files, + training_data_to_sentences, + training_data_to_tokens, +) +from nemo_text_processing.text_normalization.normalize import Normalizer + + +''' +Runs Evaluation on data in the format of : \t\t<`self` if trivial class or normalized text> +like the Google text normalization data https://www.kaggle.com/richardwilliamsproat/text-normalization-for-english-russian-and-polish +''' + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument("--input", help="input file path", type=str) + parser.add_argument("--lang", help="language", choices=['en'], default="en", type=str) + parser.add_argument( + "--input_case", help="input capitalization", choices=["lower_cased", "cased"], default="cased", type=str + ) + parser.add_argument( + "--cat", + dest="category", + help="focus on class only (" + ", ".join(known_types) + ")", + type=str, + default=None, + choices=known_types, + ) + parser.add_argument("--filter", action='store_true', help="clean data for normalization purposes") + return parser.parse_args() + + +if __name__ == "__main__": + # Example usage: + # python run_evaluate.py --input= --cat= --filter + args = parse_args() + if args.lang == 'en': + from nemo_text_processing.text_normalization.en.clean_eval_data import filter_loaded_data + file_path = args.input + normalizer = Normalizer(input_case=args.input_case, lang=args.lang) + + print("Loading training data: " + file_path) + training_data = load_files([file_path]) + + if args.filter: + training_data = filter_loaded_data(training_data) + + if args.category is None: + print("Sentence level evaluation...") + sentences_un_normalized, sentences_normalized, _ = training_data_to_sentences(training_data) + print("- Data: " + str(len(sentences_normalized)) + " sentences") + sentences_prediction = normalizer.normalize_list(sentences_un_normalized) + print("- Normalized. Evaluating...") + sentences_accuracy = evaluate( + preds=sentences_prediction, labels=sentences_normalized, input=sentences_un_normalized + ) + print("- Accuracy: " + str(sentences_accuracy)) + + print("Token level evaluation...") + tokens_per_type = training_data_to_tokens(training_data, category=args.category) + token_accuracy = {} + for token_type in tokens_per_type: + print("- Token type: " + token_type) + tokens_un_normalized, tokens_normalized = tokens_per_type[token_type] + print(" - Data: " + str(len(tokens_normalized)) + " tokens") + tokens_prediction = normalizer.normalize_list(tokens_un_normalized) + print(" - Denormalized. Evaluating...") + token_accuracy[token_type] = evaluate( + preds=tokens_prediction, labels=tokens_normalized, input=tokens_un_normalized + ) + print(" - Accuracy: " + str(token_accuracy[token_type])) + token_count_per_type = {token_type: len(tokens_per_type[token_type][0]) for token_type in tokens_per_type} + token_weighted_accuracy = [ + token_count_per_type[token_type] * accuracy for token_type, accuracy in token_accuracy.items() + ] + print("- Accuracy: " + str(sum(token_weighted_accuracy) / sum(token_count_per_type.values()))) + print(" - Total: " + str(sum(token_count_per_type.values())), '\n') + + print(" - Total: " + str(sum(token_count_per_type.values())), '\n') + + for token_type in token_accuracy: + if token_type not in known_types: + raise ValueError("Unexpected token type: " + token_type) + + if args.category is None: + c1 = ['Class', 'sent level'] + known_types + c2 = ['Num Tokens', len(sentences_normalized)] + [ + token_count_per_type[known_type] if known_type in tokens_per_type else '0' for known_type in known_types + ] + c3 = ['Normalization', sentences_accuracy] + [ + token_accuracy[known_type] if known_type in token_accuracy else '0' for known_type in known_types + ] + + for i in range(len(c1)): + print(f'{str(c1[i]):10s} | {str(c2[i]):10s} | {str(c3[i]):5s}') + else: + print(f'numbers\t{token_count_per_type[args.category]}') + print(f'Normalization\t{token_accuracy[args.category]}') diff --git a/utils/speechio/nemo_text_processing/text_normalization/token_parser.py b/utils/speechio/nemo_text_processing/text_normalization/token_parser.py new file mode 100644 index 0000000..d3f7fd9 --- /dev/null +++ b/utils/speechio/nemo_text_processing/text_normalization/token_parser.py @@ -0,0 +1,192 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import string +from collections import OrderedDict +from typing import Dict, List, Union + +PRESERVE_ORDER_KEY = "preserve_order" +EOS = "" + + +class TokenParser: + """ + Parses tokenized/classified text, e.g. 'tokens { money { integer: "20" currency: "$" } } tokens { name: "left"}' + + Args + text: tokenized text + """ + + def __call__(self, text): + """ + Setup function + + Args: + text: text to be parsed + + """ + self.text = text + self.len_text = len(text) + self.char = text[0] # cannot handle empty string + self.index = 0 + + def parse(self) -> List[dict]: + """ + Main function. Implements grammar: + A -> space F space F space F ... space + + Returns list of dictionaries + """ + l = list() + while self.parse_ws(): + token = self.parse_token() + if not token: + break + l.append(token) + return l + + def parse_token(self) -> Dict[str, Union[str, dict]]: + """ + Implements grammar: + F-> no_space KG no_space + + Returns: K, G as dictionary values + """ + d = OrderedDict() + key = self.parse_string_key() + if key is None: + return None + self.parse_ws() + if key == PRESERVE_ORDER_KEY: + self.parse_char(":") + self.parse_ws() + value = self.parse_chars("true") + else: + value = self.parse_token_value() + + d[key] = value + return d + + def parse_token_value(self) -> Union[str, dict]: + """ + Implements grammar: + G-> no_space :"VALUE" no_space | no_space {A} no_space + + Returns: string or dictionary + """ + if self.char == ":": + self.parse_char(":") + self.parse_ws() + self.parse_char("\"") + value_string = self.parse_string_value() + self.parse_char("\"") + return value_string + elif self.char == "{": + d = OrderedDict() + self.parse_char("{") + list_token_dicts = self.parse() + # flatten tokens + for tok_dict in list_token_dicts: + for k, v in tok_dict.items(): + d[k] = v + self.parse_char("}") + return d + else: + raise ValueError() + + def parse_char(self, exp) -> bool: + """ + Parses character + + Args: + exp: character to read in + + Returns true if successful + """ + assert self.char == exp + self.read() + return True + + def parse_chars(self, exp) -> bool: + """ + Parses characters + + Args: + exp: characters to read in + + Returns true if successful + """ + ok = False + for x in exp: + ok |= self.parse_char(x) + return ok + + def parse_string_key(self) -> str: + """ + Parses string key, can only contain ascii and '_' characters + + Returns parsed string key + """ + assert self.char not in string.whitespace and self.char != EOS + incl_criterium = string.ascii_letters + "_" + l = [] + while self.char in incl_criterium: + l.append(self.char) + if not self.read(): + raise ValueError() + + if not l: + return None + return "".join(l) + + def parse_string_value(self) -> str: + """ + Parses string value, ends with quote followed by space + + Returns parsed string value + """ + assert self.char not in string.whitespace and self.char != EOS + l = [] + while self.char != "\"" or self.text[self.index + 1] != " ": + l.append(self.char) + if not self.read(): + raise ValueError() + + if not l: + return None + return "".join(l) + + def parse_ws(self): + """ + Deletes whitespaces. + + Returns true if not EOS after parsing + """ + not_eos = self.char != EOS + while not_eos and self.char == " ": + not_eos = self.read() + return not_eos + + def read(self): + """ + Reads in next char. + + Returns true if not EOS + """ + if self.index < self.len_text - 1: # should be unique + self.index += 1 + self.char = self.text[self.index] + return True + self.char = EOS + return False diff --git a/utils/speechio/textnorm_en.py b/utils/speechio/textnorm_en.py new file mode 100644 index 0000000..aaf1fc7 --- /dev/null +++ b/utils/speechio/textnorm_en.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# coding=utf-8 +# Copyright 2022 Ruiqi WANG, Jinpeng LI, Jiayu DU +# +# only tested and validated on pynini v2.1.5 via : 'conda install -c conda-forge pynini' +# pynini v2.1.0 doesn't work +# + +import argparse +import os +import string +import sys + +from nemo_text_processing.text_normalization.normalize import Normalizer + + +def read_interjections(filepath): + interjections = [] + with open(filepath) as f: + for line in f: + words = [x.strip() for x in line.split(',')] + interjections += [w for w in words] + [w.upper() for w in words] + [w.lower() for w in words] + return list(set(interjections)) # deduplicated + + +if __name__ == '__main__': + p = argparse.ArgumentParser() + p.add_argument('ifile', help='input filename, assume utf-8 encoding') + p.add_argument('ofile', help='output filename') + p.add_argument('--to_upper', action='store_true', help='convert to upper case') + p.add_argument('--to_lower', action='store_true', help='convert to lower case') + p.add_argument('--has_key', action='store_true', help="input text has Kaldi's key as first field.") + p.add_argument('--log_interval', type=int, default=10000, help='log interval in number of processed lines') + args = p.parse_args() + + nemo_tn_en = Normalizer(input_case='lower_cased', lang='en') + + itj = read_interjections(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'interjections_en.csv')) + itj_map = {x: True for x in itj} + + certain_single_quote_items = ["\"'", "'?", "'!", "'.", "?'", "!'", ".'", "''", "'", "'"] + single_quote_removed_items = [x.replace("'", '') for x in certain_single_quote_items] + + puncts_to_remove = string.punctuation.replace("'", '') + "—–“”" + puncts_trans = str.maketrans(puncts_to_remove, ' ' * len(puncts_to_remove), '') + + n = 0 + with open(args.ifile, 'r', encoding='utf8') as fi, open(args.ofile, 'w+', encoding='utf8') as fo: + for line in fi: + if args.has_key: + cols = line.strip().split(maxsplit=1) + key, text = cols[0].strip(), cols[1].strip() if len(cols) == 2 else '' + else: + text = line.strip() + + text = text.replace("‘", "'").replace("’", "'") + + # nemo text normalization + # modifications to NeMo: + # 1. added UK to US conversion: nemo_text_processing/text_normalization/en/data/whitelist/UK_to_US.tsv + # 2. swith 'oh' to 'o' in year TN to avoid confusion with interjections, e.g.: + # 1805: eighteen oh five -> eighteen o five + text = nemo_tn_en.normalize(text.lower()) + + # Punctuations + # NOTE(2022.10 Jiayu): + # Single quote removal is not perfect. + # ' needs to be reserved for: + # Abbreviations: + # I'm, don't, she'd, 'cause, Sweet Child o' Mine, Guns N' Roses, ... + # Possessions: + # John's, the king's, parents', ... + text = '' + text + '' + for x, y in zip(certain_single_quote_items, single_quote_removed_items): + text = text.replace(x, y) + text = text.replace('', '').replace('', '') + + text = text.translate(puncts_trans).replace(" ' ", " ") + + # Interjections + text = ' '.join([x for x in text.strip().split() if x not in itj_map]) + + # Cases + if args.to_upper and args.to_lower: + sys.stderr.write('text norm: to_upper OR to_lower?') + exit(1) + if args.to_upper: + text = text.upper() + if args.to_lower: + text = text.lower() + + if args.has_key: + print(key + '\t' + text, file=fo) + else: + print(text, file=fo) + + n += 1 + if n % args.log_interval == 0: + print(f'text norm: {n} lines done.', file=sys.stderr) + print(f'text norm: {n} lines done in total.', file=sys.stderr) diff --git a/utils/speechio/textnorm_zh.py b/utils/speechio/textnorm_zh.py new file mode 100644 index 0000000..9a671e6 --- /dev/null +++ b/utils/speechio/textnorm_zh.py @@ -0,0 +1,1204 @@ +#!/usr/bin/env python3 +# coding=utf-8 + +# Authors: +# 2019.5 Zhiyang Zhou (https://github.com/Joee1995/chn_text_norm.git) +# 2019.9 - 2022 Jiayu DU +# +# requirements: +# - python 3.X +# notes: python 2.X WILL fail or produce misleading results + +import sys, os, argparse +import string, re +import csv + +# ================================================================================ # +# basic constant +# ================================================================================ # +CHINESE_DIGIS = u'零一二三四五六七八九' +BIG_CHINESE_DIGIS_SIMPLIFIED = u'零壹贰叁肆伍陆柒捌玖' +BIG_CHINESE_DIGIS_TRADITIONAL = u'零壹貳參肆伍陸柒捌玖' +SMALLER_BIG_CHINESE_UNITS_SIMPLIFIED = u'十百千万' +SMALLER_BIG_CHINESE_UNITS_TRADITIONAL = u'拾佰仟萬' +LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'亿兆京垓秭穰沟涧正载' +LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'億兆京垓秭穰溝澗正載' +SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'十百千万' +SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'拾佰仟萬' + +ZERO_ALT = u'〇' +ONE_ALT = u'幺' +TWO_ALTS = [u'两', u'兩'] + +POSITIVE = [u'正', u'正'] +NEGATIVE = [u'负', u'負'] +POINT = [u'点', u'點'] +# PLUS = [u'加', u'加'] +# SIL = [u'杠', u'槓'] + +FILLER_CHARS = ['呃', '啊'] + +ER_WHITELIST = '(儿女|儿子|儿孙|女儿|儿媳|妻儿|' \ + '胎儿|婴儿|新生儿|婴幼儿|幼儿|少儿|小儿|儿歌|儿童|儿科|托儿所|孤儿|' \ + '儿戏|儿化|台儿庄|鹿儿岛|正儿八经|吊儿郎当|生儿育女|托儿带女|养儿防老|痴儿呆女|' \ + '佳儿佳妇|儿怜兽扰|儿无常父|儿不嫌母丑|儿行千里母担忧|儿大不由爷|苏乞儿)' +ER_WHITELIST_PATTERN = re.compile(ER_WHITELIST) + +# 中文数字系统类型 +NUMBERING_TYPES = ['low', 'mid', 'high'] + +CURRENCY_NAMES = '(人民币|美元|日元|英镑|欧元|马克|法郎|加拿大元|澳元|港币|先令|芬兰马克|爱尔兰镑|' \ + '里拉|荷兰盾|埃斯库多|比塞塔|印尼盾|林吉特|新西兰元|比索|卢布|新加坡元|韩元|泰铢)' +CURRENCY_UNITS = '((亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)' +COM_QUANTIFIERS = '(匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|' \ + '砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|' \ + '针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|' \ + '毫|厘|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|' \ + '盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|' \ + '纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块)' + + +# Punctuation information are based on Zhon project (https://github.com/tsroten/zhon.git) +CN_PUNCS_STOP = '!?。。' +CN_PUNCS_NONSTOP = '"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏·〈〉-' +CN_PUNCS = CN_PUNCS_STOP + CN_PUNCS_NONSTOP + +PUNCS = CN_PUNCS + string.punctuation +PUNCS_TRANSFORM = str.maketrans(PUNCS, ' ' * len(PUNCS), '') # replace puncs with space + + +# https://zh.wikipedia.org/wiki/全行和半行 +QJ2BJ = { + ' ': ' ', + '!': '!', + '"': '"', + '#': '#', + '$': '$', + '%': '%', + '&': '&', + ''': "'", + '(': '(', + ')': ')', + '*': '*', + '+': '+', + ',': ',', + '-': '-', + '.': '.', + '/': '/', + '0': '0', + '1': '1', + '2': '2', + '3': '3', + '4': '4', + '5': '5', + '6': '6', + '7': '7', + '8': '8', + '9': '9', + ':': ':', + ';': ';', + '<': '<', + '=': '=', + '>': '>', + '?': '?', + '@': '@', + 'A': 'A', + 'B': 'B', + 'C': 'C', + 'D': 'D', + 'E': 'E', + 'F': 'F', + 'G': 'G', + 'H': 'H', + 'I': 'I', + 'J': 'J', + 'K': 'K', + 'L': 'L', + 'M': 'M', + 'N': 'N', + 'O': 'O', + 'P': 'P', + 'Q': 'Q', + 'R': 'R', + 'S': 'S', + 'T': 'T', + 'U': 'U', + 'V': 'V', + 'W': 'W', + 'X': 'X', + 'Y': 'Y', + 'Z': 'Z', + '[': '[', + '\': '\\', + ']': ']', + '^': '^', + '_': '_', + '`': '`', + 'a': 'a', + 'b': 'b', + 'c': 'c', + 'd': 'd', + 'e': 'e', + 'f': 'f', + 'g': 'g', + 'h': 'h', + 'i': 'i', + 'j': 'j', + 'k': 'k', + 'l': 'l', + 'm': 'm', + 'n': 'n', + 'o': 'o', + 'p': 'p', + 'q': 'q', + 'r': 'r', + 's': 's', + 't': 't', + 'u': 'u', + 'v': 'v', + 'w': 'w', + 'x': 'x', + 'y': 'y', + 'z': 'z', + '{': '{', + '|': '|', + '}': '}', + '~': '~', +} +QJ2BJ_TRANSFORM = str.maketrans(''.join(QJ2BJ.keys()), ''.join(QJ2BJ.values()), '') + + +# 2013 China National Standard: https://zh.wikipedia.org/wiki/通用规范汉字表, raw resources: +# https://github.com/mozillazg/pinyin-data/blob/master/kMandarin_8105.txt with 8105 chinese chars in total +CN_CHARS_COMMON = ( + '一丁七万丈三上下不与丏丐丑专且丕世丘丙业丛东丝丞丢两严丧个丫中丰串临丸丹为主丽举' + '乂乃久么义之乌乍乎乏乐乒乓乔乖乘乙乜九乞也习乡书乩买乱乳乸乾了予争事二亍于亏云互' + '亓五井亘亚些亟亡亢交亥亦产亨亩享京亭亮亲亳亵亶亸亹人亿什仁仂仃仄仅仆仇仉今介仍从' + '仑仓仔仕他仗付仙仝仞仟仡代令以仨仪仫们仰仲仳仵件价任份仿企伈伉伊伋伍伎伏伐休众优' + '伙会伛伞伟传伢伣伤伥伦伧伪伫伭伯估伲伴伶伸伺似伽伾佁佃但位低住佐佑体何佖佗佘余佚' + '佛作佝佞佟你佣佤佥佩佬佯佰佳佴佶佸佺佻佼佽佾使侁侂侃侄侈侉例侍侏侑侔侗侘供依侠侣' + '侥侦侧侨侩侪侬侮侯侴侵侹便促俄俅俊俍俎俏俐俑俗俘俙俚俜保俞俟信俣俦俨俩俪俫俭修俯' + '俱俳俵俶俸俺俾倌倍倏倒倓倔倕倘候倚倜倞借倡倥倦倧倨倩倪倬倭倮倴债倻值倾偁偃假偈偌' + '偎偏偓偕做停偡健偬偭偰偲偶偷偻偾偿傀傃傅傈傉傍傒傕傣傥傧储傩催傲傺傻僇僎像僔僖僚' + '僦僧僬僭僮僰僳僵僻儆儇儋儒儡儦儳儴儿兀允元兄充兆先光克免兑兔兕兖党兜兢入全八公六' + '兮兰共关兴兵其具典兹养兼兽冀冁内冈冉册再冏冒冔冕冗写军农冠冢冤冥冬冮冯冰冱冲决况' + '冶冷冻冼冽净凄准凇凉凋凌减凑凓凘凛凝几凡凤凫凭凯凰凳凶凸凹出击凼函凿刀刁刃分切刈' + '刊刍刎刑划刖列刘则刚创初删判刨利别刬刭刮到刳制刷券刹刺刻刽刿剀剁剂剃剅削剋剌前剐' + '剑剔剕剖剜剞剟剡剥剧剩剪副割剽剿劁劂劄劈劐劓力劝办功加务劢劣动助努劫劬劭励劲劳劼' + '劾势勃勇勉勋勍勐勒勔勖勘勚募勠勤勰勺勾勿匀包匆匈匍匏匐匕化北匙匜匝匠匡匣匦匪匮匹' + '区医匼匾匿十千卅升午卉半华协卑卒卓单卖南博卜卞卟占卡卢卣卤卦卧卫卬卮卯印危即却卵' + '卷卸卺卿厂厄厅历厉压厌厍厕厖厘厚厝原厢厣厥厦厨厩厮去厾县叁参叆叇又叉及友双反发叔' + '叕取受变叙叚叛叟叠口古句另叨叩只叫召叭叮可台叱史右叵叶号司叹叻叼叽吁吃各吆合吉吊' + '同名后吏吐向吒吓吕吖吗君吝吞吟吠吡吣否吧吨吩含听吭吮启吱吲吴吵吸吹吻吼吽吾呀呃呆' + '呇呈告呋呐呒呓呔呕呖呗员呙呛呜呢呣呤呦周呱呲味呵呶呷呸呻呼命咀咂咄咆咇咉咋和咍咎' + '咏咐咒咔咕咖咙咚咛咝咡咣咤咥咦咧咨咩咪咫咬咯咱咳咴咸咺咻咽咿哀品哂哃哄哆哇哈哉哌' + '响哎哏哐哑哒哓哔哕哗哙哚哝哞哟哢哥哦哧哨哩哪哭哮哱哲哳哺哼哽哿唁唆唇唉唏唐唑唔唛' + '唝唠唢唣唤唧唪唬售唯唰唱唳唵唷唼唾唿啁啃啄商啉啊啐啕啖啜啡啤啥啦啧啪啫啬啭啮啰啴' + '啵啶啷啸啻啼啾喀喁喂喃善喆喇喈喉喊喋喏喑喔喘喙喜喝喟喤喧喱喳喵喷喹喻喽喾嗄嗅嗉嗌' + '嗍嗐嗑嗒嗓嗔嗖嗜嗝嗞嗟嗡嗣嗤嗥嗦嗨嗪嗫嗬嗯嗲嗳嗵嗷嗽嗾嘀嘁嘈嘉嘌嘎嘏嘘嘚嘛嘞嘟嘡' + '嘣嘤嘧嘬嘭嘱嘲嘴嘶嘹嘻嘿噀噂噇噌噍噎噔噗噘噙噜噢噤器噩噪噫噬噱噶噻噼嚄嚅嚆嚎嚏嚓' + '嚚嚣嚭嚯嚷嚼囊囔囚四回囟因囡团囤囫园困囱围囵囷囹固国图囿圃圄圆圈圉圊圌圐圙圜土圢' + '圣在圩圪圫圬圭圮圯地圲圳圹场圻圾址坂均坉坊坋坌坍坎坏坐坑坒块坚坛坜坝坞坟坠坡坤坥' + '坦坨坩坪坫坬坭坯坰坳坷坻坼坽垂垃垄垆垈型垌垍垎垏垒垓垕垙垚垛垞垟垠垡垢垣垤垦垧垩' + '垫垭垮垯垱垲垴垵垸垺垾垿埂埃埆埇埋埌城埏埒埔埕埗埘埙埚埝域埠埤埪埫埭埯埴埵埸培基' + '埼埽堂堃堆堇堉堋堌堍堎堐堑堕堙堞堠堡堤堧堨堪堰堲堵堼堽堾塄塅塆塌塍塑塔塘塝塞塥填' + '塬塱塾墀墁境墅墈墉墐墒墓墕墘墙墚增墟墡墣墦墨墩墼壁壅壑壕壤士壬壮声壳壶壸壹处备复' + '夏夐夔夕外夙多夜够夤夥大天太夫夬夭央夯失头夷夸夹夺夼奁奂奄奇奈奉奋奎奏契奓奔奕奖' + '套奘奚奠奡奢奥奭女奴奶奸她好妁如妃妄妆妇妈妊妍妒妓妖妗妘妙妞妣妤妥妧妨妩妪妫妭妮' + '妯妲妹妻妾姆姈姊始姐姑姒姓委姗姘姚姜姝姞姣姤姥姨姬姮姱姶姹姻姽姿娀威娃娄娅娆娇娈' + '娉娌娑娓娘娜娟娠娣娥娩娱娲娴娵娶娼婀婆婉婊婌婍婕婘婚婞婠婢婤婧婪婫婳婴婵婶婷婺婻' + '婼婿媂媄媆媒媓媖媚媛媞媪媭媱媲媳媵媸媾嫁嫂嫄嫉嫌嫒嫔嫕嫖嫘嫚嫜嫠嫡嫣嫦嫩嫪嫫嫭嫱' + '嫽嬉嬖嬗嬛嬥嬬嬴嬷嬿孀孅子孑孓孔孕孖字存孙孚孛孜孝孟孢季孤孥学孩孪孬孰孱孳孵孺孽' + '宁它宄宅宇守安宋完宏宓宕宗官宙定宛宜宝实宠审客宣室宥宦宧宪宫宬宰害宴宵家宸容宽宾' + '宿寁寂寄寅密寇富寐寒寓寝寞察寡寤寥寨寮寰寸对寺寻导寿封射将尉尊小少尔尕尖尘尚尜尝' + '尢尤尥尧尨尪尬就尴尸尹尺尻尼尽尾尿局屁层屃居屈屉届屋屎屏屐屑展屙属屠屡屣履屦屯山' + '屹屺屼屾屿岁岂岈岊岌岍岐岑岔岖岗岘岙岚岛岜岞岠岢岣岨岩岫岬岭岱岳岵岷岸岽岿峁峂峃' + '峄峋峒峗峘峙峛峡峣峤峥峦峧峨峪峭峰峱峻峿崀崁崂崃崄崆崇崌崎崒崔崖崚崛崞崟崡崤崦崧' + '崩崭崮崴崶崽崾崿嵁嵅嵇嵊嵋嵌嵎嵖嵘嵚嵛嵝嵩嵫嵬嵯嵲嵴嶂嶅嶍嶒嶓嶙嶝嶟嶦嶲嶷巅巇巉' + '巍川州巡巢工左巧巨巩巫差巯己已巳巴巷巽巾币市布帅帆师希帏帐帑帔帕帖帘帙帚帛帜帝帡' + '带帧帨席帮帱帷常帻帼帽幂幄幅幌幔幕幖幛幞幡幢幪干平年并幸幺幻幼幽广庄庆庇床庋序庐' + '庑库应底庖店庙庚府庞废庠庤庥度座庭庱庳庵庶康庸庹庼庾廆廉廊廋廑廒廓廖廙廛廨廪延廷' + '建廿开弁异弃弄弆弇弈弊弋式弑弓引弗弘弛弟张弢弥弦弧弨弩弭弯弱弶弸弹强弼彀归当录彖' + '彗彘彝彟形彤彦彧彩彪彬彭彰影彳彷役彻彼往征徂径待徇很徉徊律徐徒徕得徘徙徛徜御徨循' + '徭微徵德徼徽心必忆忉忌忍忏忐忑忒忖志忘忙忝忞忠忡忤忧忪快忭忮忱忳念忸忺忻忽忾忿怀' + '态怂怃怄怅怆怊怍怎怏怒怔怕怖怙怛怜思怠怡急怦性怨怩怪怫怯怵总怼怿恁恂恃恋恍恐恒恓' + '恔恕恙恚恝恢恣恤恧恨恩恪恫恬恭息恰恳恶恸恹恺恻恼恽恿悃悄悆悈悉悌悍悒悔悖悚悛悝悟' + '悠悢患悦您悫悬悭悯悰悱悲悴悸悻悼情惆惇惊惋惎惑惔惕惘惙惚惛惜惝惟惠惦惧惨惩惫惬惭' + '惮惯惰想惴惶惹惺愀愁愃愆愈愉愍愎意愐愔愕愚感愠愣愤愦愧愫愭愿慆慈慊慌慎慑慕慝慢慥' + '慧慨慬慭慰慵慷憋憎憔憕憙憧憨憩憬憭憷憺憾懂懈懊懋懑懒懔懦懵懿戆戈戊戋戌戍戎戏成我' + '戒戕或戗战戚戛戟戡戢戣戤戥截戬戭戮戳戴户戽戾房所扁扂扃扅扆扇扈扉扊手才扎扑扒打扔' + '托扛扞扣扦执扩扪扫扬扭扮扯扰扳扶批扺扼扽找承技抃抄抉把抑抒抓抔投抖抗折抚抛抟抠抡' + '抢护报抨披抬抱抵抹抻押抽抿拂拃拄担拆拇拈拉拊拌拍拎拐拒拓拔拖拗拘拙招拜拟拢拣拤拥' + '拦拧拨择括拭拮拯拱拳拴拶拷拼拽拾拿持挂指挈按挎挑挓挖挚挛挝挞挟挠挡挣挤挥挦挨挪挫' + '振挲挹挺挽捂捃捅捆捉捋捌捍捎捏捐捕捞损捡换捣捧捩捭据捯捶捷捺捻捽掀掂掇授掉掊掌掎' + '掏掐排掖掘掞掠探掣接控推掩措掬掭掮掰掳掴掷掸掺掼掾揄揆揉揍描提插揕揖揠握揣揩揪揭' + '揳援揶揸揽揿搀搁搂搅搋搌搏搐搒搓搔搛搜搞搠搡搦搪搬搭搴携搽摁摄摅摆摇摈摊摏摒摔摘' + '摛摞摧摩摭摴摸摹摽撂撄撅撇撑撒撕撖撙撞撤撩撬播撮撰撵撷撸撺撼擀擂擅操擎擐擒擘擞擢' + '擤擦擿攀攉攒攘攥攫攮支收攸改攻攽放政故效敉敌敏救敔敕敖教敛敝敞敢散敦敩敫敬数敲整' + '敷文斋斌斐斑斓斗料斛斜斝斟斠斡斤斥斧斩斫断斯新斶方於施旁旃旄旅旆旋旌旎族旐旒旖旗' + '旞无既日旦旧旨早旬旭旮旯旰旱旴旵时旷旸旺旻旿昀昂昃昄昆昇昈昉昊昌明昏昒易昔昕昙昝' + '星映昡昣昤春昧昨昪昫昭是昱昳昴昵昶昺昼昽显晁晃晅晊晋晌晏晐晒晓晔晕晖晗晙晚晞晟晡' + '晢晤晦晨晪晫普景晰晱晴晶晷智晾暂暄暅暇暌暑暕暖暗暝暧暨暮暲暴暵暶暹暾暿曈曌曙曛曜' + '曝曦曩曰曲曳更曷曹曼曾替最月有朋服朏朐朓朔朕朗望朝期朦木未末本札术朱朳朴朵朸机朽' + '杀杂权杄杆杈杉杌李杏材村杓杕杖杙杜杞束杠条来杧杨杩杪杭杯杰杲杳杵杷杻杼松板极构枅' + '枇枉枋枍析枕林枘枚果枝枞枢枣枥枧枨枪枫枭枯枰枲枳枵架枷枸枹柁柃柄柈柊柏某柑柒染柔' + '柖柘柙柚柜柝柞柠柢查柩柬柯柰柱柳柴柷柽柿栀栅标栈栉栊栋栌栎栏栐树栒栓栖栗栝栟校栩' + '株栲栳栴样核根栻格栽栾桀桁桂桃桄桅框案桉桊桌桎桐桑桓桔桕桠桡桢档桤桥桦桧桨桩桫桯' + '桲桴桶桷桹梁梃梅梆梌梏梓梗梠梢梣梦梧梨梭梯械梳梴梵梼梽梾梿检棁棂棉棋棍棐棒棓棕棘' + '棚棠棣棤棨棪棫棬森棰棱棵棹棺棻棼棽椀椁椅椆椋植椎椐椑椒椓椟椠椤椪椭椰椴椸椹椽椿楂' + '楒楔楗楙楚楝楞楠楣楦楩楪楫楮楯楷楸楹楼概榃榄榅榆榇榈榉榍榑榔榕榖榛榜榧榨榫榭榰榱' + '榴榷榻槁槃槊槌槎槐槔槚槛槜槟槠槭槱槲槽槿樊樗樘樟模樨横樯樱樵樽樾橄橇橐橑橘橙橛橞' + '橡橥橦橱橹橼檀檄檎檐檑檗檞檠檩檫檬櫆欂欠次欢欣欤欧欲欸欹欺欻款歃歅歆歇歉歌歙止正' + '此步武歧歪歹死歼殁殂殃殄殆殇殉殊残殍殒殓殖殚殛殡殣殪殳殴段殷殿毁毂毅毋毌母每毐毒' + '毓比毕毖毗毙毛毡毪毫毯毳毵毹毽氅氆氇氍氏氐民氓气氕氖氘氙氚氛氟氡氢氤氦氧氨氩氪氮' + '氯氰氲水永氾氿汀汁求汆汇汈汉汊汋汐汔汕汗汛汜汝汞江池污汤汧汨汩汪汫汭汰汲汴汶汹汽' + '汾沁沂沃沄沅沆沇沈沉沌沏沐沓沔沘沙沚沛沟没沣沤沥沦沧沨沩沪沫沭沮沱河沸油沺治沼沽' + '沾沿泂泃泄泅泇泉泊泌泐泓泔法泖泗泙泚泛泜泞泠泡波泣泥注泪泫泮泯泰泱泳泵泷泸泺泻泼' + '泽泾洁洄洇洈洋洌洎洑洒洓洗洘洙洚洛洞洢洣津洧洨洪洫洭洮洱洲洳洴洵洸洹洺活洼洽派洿' + '流浃浅浆浇浈浉浊测浍济浏浐浑浒浓浔浕浙浚浛浜浞浟浠浡浣浥浦浩浪浬浭浮浯浰浲浴海浸' + '浼涂涄涅消涉涌涍涎涐涑涓涔涕涘涛涝涞涟涠涡涢涣涤润涧涨涩涪涫涮涯液涴涵涸涿淀淄淅' + '淆淇淋淌淏淑淖淘淙淜淝淞淟淠淡淤淦淫淬淮淯深淳淴混淹添淼清渊渌渍渎渐渑渔渗渚渝渟' + '渠渡渣渤渥温渫渭港渰渲渴游渺渼湃湄湉湍湎湑湓湔湖湘湛湜湝湟湣湫湮湲湴湾湿溁溃溅溆' + '溇溉溍溏源溘溚溜溞溟溠溢溥溦溧溪溯溱溲溴溵溶溷溹溺溻溽滁滂滃滆滇滉滋滍滏滑滓滔滕' + '滗滘滚滞滟滠满滢滤滥滦滧滨滩滪滫滴滹漂漆漈漉漋漏漓演漕漖漠漤漦漩漪漫漭漯漱漳漴漶' + '漷漹漻漼漾潆潇潋潍潏潖潘潜潞潟潢潦潩潭潮潲潴潵潸潺潼潽潾澂澄澈澉澌澍澎澛澜澡澥澧' + '澪澭澳澴澶澹澼澽激濂濉濋濑濒濞濠濡濩濮濯瀌瀍瀑瀔瀚瀛瀣瀱瀵瀹瀼灈灌灏灞火灭灯灰灵' + '灶灸灼灾灿炀炅炆炉炊炌炎炒炔炕炖炘炙炜炝炟炣炫炬炭炮炯炱炳炷炸点炻炼炽烀烁烂烃烈' + '烊烔烘烙烛烜烝烟烠烤烦烧烨烩烫烬热烯烶烷烹烺烻烽焆焉焊焌焐焓焕焖焗焘焙焚焜焞焦焯' + '焰焱然煁煃煅煊煋煌煎煓煜煞煟煤煦照煨煮煲煳煴煸煺煽熄熇熊熏熔熘熙熛熜熟熠熥熨熬熵' + '熹熻燃燊燋燎燏燔燕燚燠燥燧燮燹爆爇爔爚爝爟爨爪爬爰爱爵父爷爸爹爻爽爿牁牂片版牌牍' + '牒牖牙牚牛牝牟牡牢牤牥牦牧物牮牯牲牵特牺牻牾牿犀犁犄犇犊犋犍犏犒犟犨犬犯犰犴状犷' + '犸犹狁狂狃狄狈狉狍狎狐狒狗狙狝狞狠狡狨狩独狭狮狯狰狱狲狳狴狷狸狺狻狼猁猃猄猇猊猎' + '猕猖猗猛猜猝猞猡猢猥猩猪猫猬献猯猰猱猴猷猹猺猾猿獍獐獒獗獠獬獭獯獴獾玃玄率玉王玎' + '玑玒玓玕玖玘玙玚玛玞玟玠玡玢玤玥玦玩玫玭玮环现玱玲玳玶玷玹玺玻玼玿珀珂珅珇珈珉珊' + '珋珌珍珏珐珑珒珕珖珙珛珝珞珠珢珣珥珦珧珩珪珫班珰珲珵珷珸珹珺珽琀球琄琅理琇琈琉琊' + '琎琏琐琔琚琛琟琡琢琤琥琦琨琪琫琬琭琮琯琰琲琳琴琵琶琼瑀瑁瑂瑃瑄瑅瑆瑑瑓瑔瑕瑖瑗瑙' + '瑚瑛瑜瑝瑞瑟瑢瑧瑨瑬瑭瑰瑱瑳瑶瑷瑾璀璁璃璆璇璈璋璎璐璒璘璜璞璟璠璥璧璨璩璪璬璮璱' + '璲璺瓀瓒瓖瓘瓜瓞瓠瓢瓣瓤瓦瓮瓯瓴瓶瓷瓻瓿甄甍甏甑甓甗甘甚甜生甡甥甦用甩甪甫甬甭甯' + '田由甲申电男甸町画甾畀畅畈畋界畎畏畔畖留畚畛畜畤略畦番畬畯畲畴畸畹畿疁疃疆疍疏疐' + '疑疔疖疗疙疚疝疟疠疡疢疣疤疥疫疬疭疮疯疰疱疲疳疴疵疸疹疼疽疾痂痃痄病症痈痉痊痍痒' + '痓痔痕痘痛痞痢痣痤痦痧痨痪痫痰痱痴痹痼痿瘀瘁瘃瘅瘆瘊瘌瘐瘕瘗瘘瘙瘛瘟瘠瘢瘤瘥瘦瘩' + '瘪瘫瘭瘰瘳瘴瘵瘸瘼瘾瘿癀癃癌癍癔癖癗癜癞癣癫癯癸登白百癿皂的皆皇皈皋皎皑皓皕皖皙' + '皛皞皤皦皭皮皱皲皴皿盂盅盆盈盉益盍盎盏盐监盒盔盖盗盘盛盟盥盦目盯盱盲直盷相盹盼盾' + '省眄眇眈眉眊看眍眙眚真眠眢眦眨眩眬眭眯眵眶眷眸眺眼着睁睃睄睇睎睐睑睚睛睡睢督睥睦' + '睨睫睬睹睽睾睿瞀瞄瞅瞋瞌瞍瞎瞑瞒瞟瞠瞢瞥瞧瞩瞪瞫瞬瞭瞰瞳瞵瞻瞽瞿矍矗矛矜矞矢矣知' + '矧矩矫矬短矮矰石矶矸矻矼矾矿砀码砂砄砆砉砌砍砑砒研砖砗砘砚砜砝砟砠砣砥砧砫砬砭砮' + '砰破砵砷砸砹砺砻砼砾础硁硅硇硊硌硍硎硐硒硔硕硖硗硙硚硝硪硫硬硭确硼硿碃碇碈碉碌碍' + '碎碏碑碓碗碘碚碛碜碟碡碣碥碧碨碰碱碲碳碴碶碹碾磁磅磉磊磋磏磐磔磕磙磜磡磨磬磲磴磷' + '磹磻礁礅礌礓礞礴礵示礼社祀祁祃祆祇祈祉祊祋祎祏祐祓祕祖祗祚祛祜祝神祟祠祢祥祧票祭' + '祯祲祷祸祺祼祾禀禁禄禅禊禋福禒禔禘禚禛禤禧禳禹禺离禽禾秀私秃秆秉秋种科秒秕秘租秣' + '秤秦秧秩秫秬秭积称秸移秽秾稀稂稃稆程稌稍税稑稔稗稙稚稞稠稣稳稷稹稻稼稽稿穄穆穑穗' + '穙穜穟穰穴究穷穸穹空穿窀突窃窄窅窈窊窍窎窑窒窕窖窗窘窜窝窟窠窣窥窦窨窬窭窳窸窿立' + '竑竖竘站竞竟章竣童竦竫竭端竹竺竽竿笃笄笆笈笊笋笏笑笔笕笙笛笞笠笤笥符笨笪笫第笮笯' + '笱笳笸笺笼笾筀筅筇等筋筌筏筐筑筒答策筘筚筛筜筝筠筢筤筥筦筮筱筲筵筶筷筹筻筼签简箅' + '箍箐箓箔箕箖算箜管箢箦箧箨箩箪箫箬箭箱箴箸篁篆篇篌篑篓篙篚篝篡篥篦篪篮篯篱篷篼篾' + '簃簇簉簋簌簏簕簖簝簟簠簧簪簰簸簿籀籁籍籥米籴类籼籽粉粑粒粕粗粘粜粝粞粟粢粤粥粪粮' + '粱粲粳粹粼粽精粿糁糅糇糈糊糌糍糒糕糖糗糙糜糟糠糨糯糵系紊素索紧紫累絜絮絷綦綮縠縢' + '縻繁繄繇纂纛纠纡红纣纤纥约级纨纩纪纫纬纭纮纯纰纱纲纳纴纵纶纷纸纹纺纻纼纽纾线绀绁' + '绂练组绅细织终绉绊绋绌绍绎经绐绑绒结绔绕绖绗绘给绚绛络绝绞统绠绡绢绣绤绥绦继绨绩' + '绪绫续绮绯绰绱绲绳维绵绶绷绸绹绺绻综绽绾绿缀缁缂缃缄缅缆缇缈缉缊缌缎缐缑缒缓缔缕' + '编缗缘缙缚缛缜缝缞缟缠缡缢缣缤缥缦缧缨缩缪缫缬缭缮缯缰缱缲缳缴缵缶缸缺罂罄罅罍罐' + '网罔罕罗罘罚罟罡罢罨罩罪置罱署罴罶罹罽罾羁羊羌美羑羓羔羕羖羚羝羞羟羡群羧羯羰羱羲' + '羸羹羼羽羿翀翁翂翃翅翈翊翌翎翔翕翘翙翚翛翟翠翡翥翦翩翮翯翰翱翳翷翻翼翾耀老考耄者' + '耆耇耋而耍耏耐耑耒耔耕耖耗耘耙耜耠耢耤耥耦耧耨耩耪耰耱耳耵耶耷耸耻耽耿聂聃聆聊聋' + '职聍聒联聘聚聩聪聱聿肃肄肆肇肉肋肌肓肖肘肚肛肝肟肠股肢肤肥肩肪肫肭肮肯肱育肴肷肸' + '肺肼肽肾肿胀胁胂胃胄胆胈背胍胎胖胗胙胚胛胜胝胞胠胡胣胤胥胧胨胩胪胫胬胭胯胰胱胲胳' + '胴胶胸胺胼能脂脆脉脊脍脎脏脐脑脒脓脔脖脘脚脞脟脩脬脯脱脲脶脸脾脿腆腈腊腋腌腐腑腒' + '腓腔腕腘腙腚腠腥腧腨腩腭腮腯腰腱腴腹腺腻腼腽腾腿膀膂膈膊膏膑膘膙膛膜膝膦膨膳膺膻' + '臀臂臃臆臊臌臑臜臣臧自臬臭至致臻臼臾舀舁舂舄舅舆舌舍舐舒舔舛舜舞舟舠舢舣舥航舫般' + '舭舯舰舱舲舳舴舵舶舷舸船舻舾艄艅艇艉艋艎艏艘艚艟艨艮良艰色艳艴艺艽艾艿节芃芄芈芊' + '芋芍芎芏芑芒芗芘芙芜芝芟芠芡芣芤芥芦芨芩芪芫芬芭芮芯芰花芳芴芷芸芹芼芽芾苁苄苇苈' + '苉苊苋苌苍苎苏苑苒苓苔苕苗苘苛苜苞苟苠苡苣苤若苦苧苫苯英苴苷苹苻苾茀茁茂范茄茅茆' + '茈茉茋茌茎茏茑茓茔茕茗茚茛茜茝茧茨茫茬茭茯茱茳茴茵茶茸茹茺茼茽荀荁荃荄荆荇草荏荐' + '荑荒荓荔荖荙荚荛荜荞荟荠荡荣荤荥荦荧荨荩荪荫荬荭荮药荷荸荻荼荽莅莆莉莎莒莓莘莙莛' + '莜莝莞莠莨莩莪莫莰莱莲莳莴莶获莸莹莺莼莽莿菀菁菂菅菇菉菊菌菍菏菔菖菘菜菝菟菠菡菥' + '菩菪菰菱菲菹菼菽萁萃萄萆萋萌萍萎萏萑萘萚萜萝萣萤营萦萧萨萩萱萳萸萹萼落葆葎葑葖著' + '葙葚葛葜葡董葩葫葬葭葰葱葳葴葵葶葸葺蒂蒄蒇蒈蒉蒋蒌蒎蒐蒗蒙蒜蒟蒡蒨蒯蒱蒲蒴蒸蒹蒺' + '蒻蒽蒿蓁蓂蓄蓇蓉蓊蓍蓏蓐蓑蓓蓖蓝蓟蓠蓢蓣蓥蓦蓬蓰蓼蓿蔀蔃蔈蔊蔌蔑蔓蔗蔚蔟蔡蔫蔬蔷' + '蔸蔹蔺蔻蔼蔽蕃蕈蕉蕊蕖蕗蕙蕞蕤蕨蕰蕲蕴蕹蕺蕻蕾薁薄薅薇薏薛薜薢薤薨薪薮薯薰薳薷薸' + '薹薿藁藉藏藐藓藕藜藟藠藤藦藨藩藻藿蘅蘑蘖蘘蘧蘩蘸蘼虎虏虐虑虒虓虔虚虞虢虤虫虬虮虱' + '虷虸虹虺虻虼虽虾虿蚀蚁蚂蚄蚆蚊蚋蚌蚍蚓蚕蚜蚝蚣蚤蚧蚨蚩蚪蚬蚯蚰蚱蚲蚴蚶蚺蛀蛃蛄蛆' + '蛇蛉蛊蛋蛎蛏蛐蛑蛔蛘蛙蛛蛞蛟蛤蛩蛭蛮蛰蛱蛲蛳蛴蛸蛹蛾蜀蜂蜃蜇蜈蜉蜊蜍蜎蜐蜒蜓蜕蜗' + '蜘蜚蜜蜞蜡蜢蜣蜥蜩蜮蜱蜴蜷蜻蜾蜿蝇蝈蝉蝌蝎蝓蝗蝘蝙蝠蝣蝤蝥蝮蝰蝲蝴蝶蝻蝼蝽蝾螂螃' + '螅螈螋融螗螟螠螣螨螫螬螭螯螱螳螵螺螽蟀蟆蟊蟋蟏蟑蟒蟛蟠蟥蟪蟫蟮蟹蟾蠃蠊蠋蠓蠕蠖蠡' + '蠢蠲蠹蠼血衃衄衅行衍衎衒衔街衙衠衡衢衣补表衩衫衬衮衰衲衷衽衾衿袁袂袄袅袆袈袋袍袒' + '袖袗袜袢袤袪被袭袯袱袷袼裁裂装裆裈裉裎裒裔裕裘裙裛裟裢裣裤裥裨裰裱裳裴裸裹裼裾褂' + '褊褐褒褓褕褙褚褛褟褡褥褪褫褯褰褴褶襁襄襕襚襜襞襟襦襫襻西要覃覆见观觃规觅视觇览觉' + '觊觋觌觎觏觐觑角觖觚觜觞觟解觥触觫觭觯觱觳觿言訄訇訚訾詈詟詹誉誊誓謇警譬计订讣认' + '讥讦讧讨让讪讫训议讯记讱讲讳讴讵讶讷许讹论讻讼讽设访诀证诂诃评诅识诇诈诉诊诋诌词' + '诎诏诐译诒诓诔试诖诗诘诙诚诛诜话诞诟诠诡询诣诤该详诧诨诩诫诬语诮误诰诱诲诳说诵请' + '诸诹诺读诼诽课诿谀谁谂调谄谅谆谇谈谊谋谌谍谎谏谐谑谒谓谔谕谖谗谙谚谛谜谝谞谟谠谡' + '谢谣谤谥谦谧谨谩谪谫谬谭谮谯谰谱谲谳谴谵谶谷谼谿豁豆豇豉豌豕豚象豢豨豪豫豮豳豸豹' + '豺貂貅貆貉貊貌貔貘贝贞负贡财责贤败账货质贩贪贫贬购贮贯贰贱贲贳贴贵贶贷贸费贺贻贼' + '贽贾贿赀赁赂赃资赅赆赇赈赉赊赋赌赍赎赏赐赑赒赓赔赕赖赗赘赙赚赛赜赝赞赟赠赡赢赣赤' + '赦赧赪赫赭走赳赴赵赶起趁趄超越趋趑趔趟趣趯趱足趴趵趸趺趼趾趿跂跃跄跆跋跌跎跏跐跑' + '跖跗跚跛距跞跟跣跤跨跪跬路跱跳践跶跷跸跹跺跻跽踅踉踊踌踏踒踔踝踞踟踢踣踦踩踪踬踮' + '踯踱踵踶踹踺踽蹀蹁蹂蹄蹅蹇蹈蹉蹊蹋蹐蹑蹒蹙蹚蹜蹢蹦蹩蹬蹭蹯蹰蹲蹴蹶蹼蹽蹾蹿躁躅躇' + '躏躐躔躜躞身躬躯躲躺车轧轨轩轪轫转轭轮软轰轱轲轳轴轵轶轷轸轹轺轻轼载轾轿辀辁辂较' + '辄辅辆辇辈辉辊辋辌辍辎辏辐辑辒输辔辕辖辗辘辙辚辛辜辞辟辣辨辩辫辰辱边辽达辿迁迂迄' + '迅过迈迎运近迓返迕还这进远违连迟迢迤迥迦迨迩迪迫迭迮述迳迷迸迹迺追退送适逃逄逅逆' + '选逊逋逍透逐逑递途逖逗通逛逝逞速造逡逢逦逭逮逯逴逵逶逸逻逼逾遁遂遄遆遇遍遏遐遑遒' + '道遗遘遛遢遣遥遨遭遮遴遵遹遽避邀邂邃邈邋邑邓邕邗邘邙邛邝邠邡邢那邦邨邪邬邮邯邰邱' + '邲邳邴邵邶邸邹邺邻邽邾邿郁郃郄郅郇郈郊郎郏郐郑郓郗郚郛郜郝郡郢郤郦郧部郪郫郭郯郴' + '郸都郾郿鄀鄂鄃鄄鄅鄌鄑鄗鄘鄙鄚鄜鄞鄠鄢鄣鄫鄯鄱鄹酂酃酅酆酉酊酋酌配酎酏酐酒酗酚酝' + '酞酡酢酣酤酥酦酩酪酬酮酯酰酱酲酴酵酶酷酸酹酺酽酾酿醅醇醉醋醌醍醐醑醒醚醛醢醨醪醭' + '醮醯醴醵醺醾采釉释里重野量釐金釜鉴銎銮鋆鋈錾鍪鎏鏊鏖鐾鑫钆钇针钉钊钋钌钍钎钏钐钒' + '钓钔钕钖钗钘钙钚钛钜钝钞钟钠钡钢钣钤钥钦钧钨钩钪钫钬钭钮钯钰钱钲钳钴钵钷钹钺钻钼' + '钽钾钿铀铁铂铃铄铅铆铈铉铊铋铌铍铎铏铐铑铒铕铖铗铘铙铚铛铜铝铞铟铠铡铢铣铤铥铧铨' + '铩铪铫铬铭铮铯铰铱铲铳铴铵银铷铸铹铺铻铼铽链铿销锁锂锃锄锅锆锇锈锉锊锋锌锍锎锏锐' + '锑锒锓锔锕锖锗锘错锚锛锜锝锞锟锡锢锣锤锥锦锧锨锩锪锫锬锭键锯锰锱锲锳锴锵锶锷锸锹' + '锺锻锼锽锾锿镀镁镂镃镄镅镆镇镈镉镊镋镌镍镎镏镐镑镒镓镔镕镖镗镘镚镛镜镝镞镠镡镢镣' + '镤镥镦镧镨镩镪镫镬镭镮镯镰镱镲镳镴镵镶长门闩闪闫闭问闯闰闱闲闳间闵闶闷闸闹闺闻闼' + '闽闾闿阀阁阂阃阄阅阆阇阈阉阊阋阌阍阎阏阐阑阒阔阕阖阗阘阙阚阜队阡阪阮阱防阳阴阵阶' + '阻阼阽阿陀陂附际陆陇陈陉陋陌降陎限陑陔陕陛陞陟陡院除陧陨险陪陬陲陴陵陶陷隃隅隆隈' + '隋隍随隐隔隗隘隙障隧隩隰隳隶隹隺隼隽难雀雁雄雅集雇雉雊雌雍雎雏雒雕雠雨雩雪雯雱雳' + '零雷雹雾需霁霄霅霆震霈霉霍霎霏霓霖霜霞霨霪霭霰露霸霹霾青靓靖静靛非靠靡面靥革靬靰' + '靳靴靶靸靺靼靽靿鞁鞅鞋鞍鞑鞒鞔鞘鞠鞡鞣鞧鞨鞫鞬鞭鞮鞯鞲鞳鞴韂韦韧韨韩韪韫韬韭音韵' + '韶页顶顷顸项顺须顼顽顾顿颀颁颂颃预颅领颇颈颉颊颋颌颍颎颏颐频颓颔颖颗题颙颚颛颜额' + '颞颟颠颡颢颤颥颦颧风飏飐飑飒飓飔飕飗飘飙飞食飧飨餍餐餮饔饕饥饧饨饩饪饫饬饭饮饯饰' + '饱饲饳饴饵饶饷饸饹饺饻饼饽饿馁馃馄馅馆馇馈馉馊馋馌馍馏馐馑馒馓馔馕首馗馘香馝馞馥' + '馧馨马驭驮驯驰驱驲驳驴驵驶驷驸驹驺驻驼驽驾驿骀骁骂骃骄骅骆骇骈骉骊骋验骍骎骏骐骑' + '骒骓骕骖骗骘骙骚骛骜骝骞骟骠骡骢骣骤骥骦骧骨骰骱骶骷骸骺骼髀髁髂髃髅髋髌髎髑髓高' + '髡髢髦髫髭髯髹髻髽鬃鬈鬏鬒鬓鬘鬟鬣鬯鬲鬶鬷鬻鬼魁魂魃魄魅魆魇魈魉魋魍魏魑魔鱼鱽鱾' + '鱿鲀鲁鲂鲃鲅鲆鲇鲈鲉鲊鲋鲌鲍鲎鲏鲐鲑鲒鲔鲕鲖鲗鲘鲙鲚鲛鲜鲝鲞鲟鲠鲡鲢鲣鲤鲥鲦鲧鲨' + '鲩鲪鲫鲬鲭鲮鲯鲰鲱鲲鲳鲴鲵鲷鲸鲹鲺鲻鲼鲽鲾鲿鳀鳁鳂鳃鳄鳅鳇鳈鳉鳊鳌鳍鳎鳏鳐鳑鳒鳓' + '鳔鳕鳖鳗鳘鳙鳚鳛鳜鳝鳞鳟鳠鳡鳢鳣鳤鸟鸠鸡鸢鸣鸤鸥鸦鸧鸨鸩鸪鸫鸬鸭鸮鸯鸰鸱鸲鸳鸵鸶' + '鸷鸸鸹鸺鸻鸼鸽鸾鸿鹀鹁鹂鹃鹄鹅鹆鹇鹈鹉鹊鹋鹌鹍鹎鹏鹐鹑鹒鹔鹕鹖鹗鹘鹙鹚鹛鹜鹝鹞鹟' + '鹠鹡鹢鹣鹤鹦鹧鹨鹩鹪鹫鹬鹭鹮鹯鹰鹱鹲鹳鹴鹾鹿麀麂麇麈麋麑麒麓麖麝麟麦麸麹麻麽麾黄' + '黇黉黍黎黏黑黔默黛黜黝黟黠黡黢黥黧黩黪黯黹黻黼黾鼋鼍鼎鼐鼒鼓鼗鼙鼠鼢鼩鼫鼬鼯鼱鼷' + '鼹鼻鼽鼾齁齇齉齐齑齿龀龁龂龃龄龅龆龇龈龉龊龋龌龙龚龛龟龠龢鿍鿎鿏㑇㑊㕮㘎㙍㙘㙦㛃' + '㛚㛹㟃㠇㠓㤘㥄㧐㧑㧟㫰㬊㬎㬚㭎㭕㮾㰀㳇㳘㳚㴔㵐㶲㸆㸌㺄㻬㽏㿠䁖䂮䃅䃎䅟䌹䎃䎖䏝䏡' + '䏲䐃䓖䓛䓨䓫䓬䗖䗛䗪䗴䜣䝙䢺䢼䣘䥽䦃䲟䲠䲢䴓䴔䴕䴖䴗䴘䴙䶮𠅤𠙶𠳐𡎚𡐓𣗋𣲗𣲘𣸣𤧛𤩽' + '𤫉𥔲𥕢𥖨𥻗𦈡𦒍𦙶𦝼𦭜𦰡𧿹𨐈𨙸𨚕𨟠𨭉𨱇𨱏𨱑𨱔𨺙𩽾𩾃𩾌𪟝𪣻𪤗𪨰𪨶𪩘𪾢𫄧𫄨𫄷𫄸𫇭𫌀𫍣𫍯' + '𫍲𫍽𫐄𫐐𫐓𫑡𫓧𫓯𫓶𫓹𫔍𫔎𫔶𫖮𫖯𫖳𫗧𫗴𫘜𫘝𫘦𫘧𫘨𫘪𫘬𫚕𫚖𫚭𫛭𫞩𫟅𫟦𫟹𫟼𫠆𫠊𫠜𫢸𫫇𫭟' + '𫭢𫭼𫮃𫰛𫵷𫶇𫷷𫸩𬀩𬀪𬂩𬃊𬇕𬇙𬇹𬉼𬊈𬊤𬌗𬍛𬍡𬍤𬒈𬒔𬒗𬕂𬘓𬘘𬘡𬘩𬘫𬘬𬘭𬘯𬙂𬙊𬙋𬜬𬜯𬞟' + '𬟁𬟽𬣙𬣞𬣡𬣳𬤇𬤊𬤝𬨂𬨎𬩽𬪩𬬩𬬭𬬮𬬱𬬸𬬹𬬻𬬿𬭁𬭊𬭎𬭚𬭛𬭤𬭩𬭬𬭯𬭳𬭶𬭸𬭼𬮱𬮿𬯀𬯎𬱖𬱟' + '𬳵𬳶𬳽𬳿𬴂𬴃𬴊𬶋𬶍𬶏𬶐𬶟𬶠𬶨𬶭𬶮𬷕𬸘𬸚𬸣𬸦𬸪𬹼𬺈𬺓' +) +CN_CHARS_EXT = '吶诶屌囧飚屄' + +CN_CHARS = CN_CHARS_COMMON + CN_CHARS_EXT +IN_CH_CHARS = { c : True for c in CN_CHARS } + +EN_CHARS = string.ascii_letters + string.digits +IN_EN_CHARS = { c : True for c in EN_CHARS } + +VALID_CHARS = CN_CHARS + EN_CHARS + ' ' +IN_VALID_CHARS = { c : True for c in VALID_CHARS } + +# ================================================================================ # +# basic class +# ================================================================================ # +class ChineseChar(object): + """ + 中文字符 + 每个字符对应简体和繁体, + e.g. 简体 = '负', 繁体 = '負' + 转换时可转换为简体或繁体 + """ + + def __init__(self, simplified, traditional): + self.simplified = simplified + self.traditional = traditional + #self.__repr__ = self.__str__ + + def __str__(self): + return self.simplified or self.traditional or None + + def __repr__(self): + return self.__str__() + + +class ChineseNumberUnit(ChineseChar): + """ + 中文数字/数位字符 + 每个字符除繁简体外还有一个额外的大写字符 + e.g. '陆' 和 '陸' + """ + + def __init__(self, power, simplified, traditional, big_s, big_t): + super(ChineseNumberUnit, self).__init__(simplified, traditional) + self.power = power + self.big_s = big_s + self.big_t = big_t + + def __str__(self): + return '10^{}'.format(self.power) + + @classmethod + def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small_unit=False): + + if small_unit: + return ChineseNumberUnit(power=index + 1, + simplified=value[0], traditional=value[1], big_s=value[1], big_t=value[1]) + elif numbering_type == NUMBERING_TYPES[0]: + return ChineseNumberUnit(power=index + 8, + simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1]) + elif numbering_type == NUMBERING_TYPES[1]: + return ChineseNumberUnit(power=(index + 2) * 4, + simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1]) + elif numbering_type == NUMBERING_TYPES[2]: + return ChineseNumberUnit(power=pow(2, index + 3), + simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1]) + else: + raise ValueError( + 'Counting type should be in {0} ({1} provided).'.format(NUMBERING_TYPES, numbering_type)) + + +class ChineseNumberDigit(ChineseChar): + """ + 中文数字字符 + """ + + def __init__(self, value, simplified, traditional, big_s, big_t, alt_s=None, alt_t=None): + super(ChineseNumberDigit, self).__init__(simplified, traditional) + self.value = value + self.big_s = big_s + self.big_t = big_t + self.alt_s = alt_s + self.alt_t = alt_t + + def __str__(self): + return str(self.value) + + @classmethod + def create(cls, i, v): + return ChineseNumberDigit(i, v[0], v[1], v[2], v[3]) + + +class ChineseMath(ChineseChar): + """ + 中文数位字符 + """ + + def __init__(self, simplified, traditional, symbol, expression=None): + super(ChineseMath, self).__init__(simplified, traditional) + self.symbol = symbol + self.expression = expression + self.big_s = simplified + self.big_t = traditional + + +CC, CNU, CND, CM = ChineseChar, ChineseNumberUnit, ChineseNumberDigit, ChineseMath + + +class NumberSystem(object): + """ + 中文数字系统 + """ + pass + + +class MathSymbol(object): + """ + 用于中文数字系统的数学符号 (繁/简体), e.g. + positive = ['正', '正'] + negative = ['负', '負'] + point = ['点', '點'] + """ + + def __init__(self, positive, negative, point): + self.positive = positive + self.negative = negative + self.point = point + + def __iter__(self): + for v in self.__dict__.values(): + yield v + + +# class OtherSymbol(object): +# """ +# 其他符号 +# """ +# +# def __init__(self, sil): +# self.sil = sil +# +# def __iter__(self): +# for v in self.__dict__.values(): +# yield v + + +# ================================================================================ # +# basic utils +# ================================================================================ # +def create_system(numbering_type=NUMBERING_TYPES[1]): + """ + 根据数字系统类型返回创建相应的数字系统,默认为 mid + NUMBERING_TYPES = ['low', 'mid', 'high']: 中文数字系统类型 + low: '兆' = '亿' * '十' = $10^{9}$, '京' = '兆' * '十', etc. + mid: '兆' = '亿' * '万' = $10^{12}$, '京' = '兆' * '万', etc. + high: '兆' = '亿' * '亿' = $10^{16}$, '京' = '兆' * '兆', etc. + 返回对应的数字系统 + """ + + # chinese number units of '亿' and larger + all_larger_units = zip( + LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED, LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL) + larger_units = [CNU.create(i, v, numbering_type, False) + for i, v in enumerate(all_larger_units)] + # chinese number units of '十, 百, 千, 万' + all_smaller_units = zip( + SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED, SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL) + smaller_units = [CNU.create(i, v, small_unit=True) + for i, v in enumerate(all_smaller_units)] + # digis + chinese_digis = zip(CHINESE_DIGIS, CHINESE_DIGIS, + BIG_CHINESE_DIGIS_SIMPLIFIED, BIG_CHINESE_DIGIS_TRADITIONAL) + digits = [CND.create(i, v) for i, v in enumerate(chinese_digis)] + digits[0].alt_s, digits[0].alt_t = ZERO_ALT, ZERO_ALT + digits[1].alt_s, digits[1].alt_t = ONE_ALT, ONE_ALT + digits[2].alt_s, digits[2].alt_t = TWO_ALTS[0], TWO_ALTS[1] + + # symbols + positive_cn = CM(POSITIVE[0], POSITIVE[1], '+', lambda x: x) + negative_cn = CM(NEGATIVE[0], NEGATIVE[1], '-', lambda x: -x) + point_cn = CM(POINT[0], POINT[1], '.', lambda x, + y: float(str(x) + '.' + str(y))) + # sil_cn = CM(SIL[0], SIL[1], '-', lambda x, y: float(str(x) + '-' + str(y))) + system = NumberSystem() + system.units = smaller_units + larger_units + system.digits = digits + system.math = MathSymbol(positive_cn, negative_cn, point_cn) + # system.symbols = OtherSymbol(sil_cn) + return system + + +def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]): + + def get_symbol(char, system): + for u in system.units: + if char in [u.traditional, u.simplified, u.big_s, u.big_t]: + return u + for d in system.digits: + if char in [d.traditional, d.simplified, d.big_s, d.big_t, d.alt_s, d.alt_t]: + return d + for m in system.math: + if char in [m.traditional, m.simplified]: + return m + + def string2symbols(chinese_string, system): + int_string, dec_string = chinese_string, '' + for p in [system.math.point.simplified, system.math.point.traditional]: + if p in chinese_string: + int_string, dec_string = chinese_string.split(p) + break + return [get_symbol(c, system) for c in int_string], \ + [get_symbol(c, system) for c in dec_string] + + def correct_symbols(integer_symbols, system): + """ + 一百八 to 一百八十 + 一亿一千三百万 to 一亿 一千万 三百万 + """ + + if integer_symbols and isinstance(integer_symbols[0], CNU): + if integer_symbols[0].power == 1: + integer_symbols = [system.digits[1]] + integer_symbols + + if len(integer_symbols) > 1: + if isinstance(integer_symbols[-1], CND) and isinstance(integer_symbols[-2], CNU): + integer_symbols.append( + CNU(integer_symbols[-2].power - 1, None, None, None, None)) + + result = [] + unit_count = 0 + for s in integer_symbols: + if isinstance(s, CND): + result.append(s) + unit_count = 0 + elif isinstance(s, CNU): + current_unit = CNU(s.power, None, None, None, None) + unit_count += 1 + + if unit_count == 1: + result.append(current_unit) + elif unit_count > 1: + for i in range(len(result)): + if isinstance(result[-i - 1], CNU) and result[-i - 1].power < current_unit.power: + result[-i - 1] = CNU(result[-i - 1].power + + current_unit.power, None, None, None, None) + return result + + def compute_value(integer_symbols): + """ + Compute the value. + When current unit is larger than previous unit, current unit * all previous units will be used as all previous units. + e.g. '两千万' = 2000 * 10000 not 2000 + 10000 + """ + value = [0] + last_power = 0 + for s in integer_symbols: + if isinstance(s, CND): + value[-1] = s.value + elif isinstance(s, CNU): + value[-1] *= pow(10, s.power) + if s.power > last_power: + value[:-1] = list(map(lambda v: v * + pow(10, s.power), value[:-1])) + last_power = s.power + value.append(0) + return sum(value) + + system = create_system(numbering_type) + int_part, dec_part = string2symbols(chinese_string, system) + int_part = correct_symbols(int_part, system) + int_str = str(compute_value(int_part)) + dec_str = ''.join([str(d.value) for d in dec_part]) + if dec_part: + return '{0}.{1}'.format(int_str, dec_str) + else: + return int_str + + +def num2chn(number_string, numbering_type=NUMBERING_TYPES[1], big=False, + traditional=False, alt_zero=False, alt_one=False, alt_two=True, + use_zeros=True, use_units=True): + + def get_value(value_string, use_zeros=True): + + striped_string = value_string.lstrip('0') + + # record nothing if all zeros + if not striped_string: + return [] + + # record one digits + elif len(striped_string) == 1: + if use_zeros and len(value_string) != len(striped_string): + return [system.digits[0], system.digits[int(striped_string)]] + else: + return [system.digits[int(striped_string)]] + + # recursively record multiple digits + else: + result_unit = next(u for u in reversed( + system.units) if u.power < len(striped_string)) + result_string = value_string[:-result_unit.power] + return get_value(result_string) + [result_unit] + get_value(striped_string[-result_unit.power:]) + + system = create_system(numbering_type) + + int_dec = number_string.split('.') + if len(int_dec) == 1: + int_string = int_dec[0] + dec_string = "" + elif len(int_dec) == 2: + int_string = int_dec[0] + dec_string = int_dec[1] + else: + raise ValueError( + "invalid input num string with more than one dot: {}".format(number_string)) + + if use_units and len(int_string) > 1: + result_symbols = get_value(int_string) + else: + result_symbols = [system.digits[int(c)] for c in int_string] + dec_symbols = [system.digits[int(c)] for c in dec_string] + if dec_string: + result_symbols += [system.math.point] + dec_symbols + + if alt_two: + liang = CND(2, system.digits[2].alt_s, system.digits[2].alt_t, + system.digits[2].big_s, system.digits[2].big_t) + for i, v in enumerate(result_symbols): + if isinstance(v, CND) and v.value == 2: + next_symbol = result_symbols[i + + 1] if i < len(result_symbols) - 1 else None + previous_symbol = result_symbols[i - 1] if i > 0 else None + if isinstance(next_symbol, CNU) and isinstance(previous_symbol, (CNU, type(None))): + if next_symbol.power != 1 and ((previous_symbol is None) or (previous_symbol.power != 1)): + result_symbols[i] = liang + + # if big is True, '两' will not be used and `alt_two` has no impact on output + if big: + attr_name = 'big_' + if traditional: + attr_name += 't' + else: + attr_name += 's' + else: + if traditional: + attr_name = 'traditional' + else: + attr_name = 'simplified' + + result = ''.join([getattr(s, attr_name) for s in result_symbols]) + + # if not use_zeros: + # result = result.strip(getattr(system.digits[0], attr_name)) + + if alt_zero: + result = result.replace( + getattr(system.digits[0], attr_name), system.digits[0].alt_s) + + if alt_one: + result = result.replace( + getattr(system.digits[1], attr_name), system.digits[1].alt_s) + + for i, p in enumerate(POINT): + if result.startswith(p): + return CHINESE_DIGIS[0] + result + + # ^10, 11, .., 19 + if len(result) >= 2 and result[1] in [SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED[0], + SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL[0]] and \ + result[0] in [CHINESE_DIGIS[1], BIG_CHINESE_DIGIS_SIMPLIFIED[1], BIG_CHINESE_DIGIS_TRADITIONAL[1]]: + result = result[1:] + + return result + + +# ================================================================================ # +# different types of rewriters +# ================================================================================ # +class Cardinal: + """ + CARDINAL类 + """ + + def __init__(self, cardinal=None, chntext=None): + self.cardinal = cardinal + self.chntext = chntext + + def chntext2cardinal(self): + return chn2num(self.chntext) + + def cardinal2chntext(self): + return num2chn(self.cardinal) + +class Digit: + """ + DIGIT类 + """ + + def __init__(self, digit=None, chntext=None): + self.digit = digit + self.chntext = chntext + + # def chntext2digit(self): + # return chn2num(self.chntext) + + def digit2chntext(self): + return num2chn(self.digit, alt_two=False, use_units=False) + + +class TelePhone: + """ + TELEPHONE类 + """ + + def __init__(self, telephone=None, raw_chntext=None, chntext=None): + self.telephone = telephone + self.raw_chntext = raw_chntext + self.chntext = chntext + + # def chntext2telephone(self): + # sil_parts = self.raw_chntext.split('') + # self.telephone = '-'.join([ + # str(chn2num(p)) for p in sil_parts + # ]) + # return self.telephone + + def telephone2chntext(self, fixed=False): + + if fixed: + sil_parts = self.telephone.split('-') + self.raw_chntext = ''.join([ + num2chn(part, alt_two=False, use_units=False) for part in sil_parts + ]) + self.chntext = self.raw_chntext.replace('', '') + else: + sp_parts = self.telephone.strip('+').split() + self.raw_chntext = ''.join([ + num2chn(part, alt_two=False, use_units=False) for part in sp_parts + ]) + self.chntext = self.raw_chntext.replace('', '') + return self.chntext + + +class Fraction: + """ + FRACTION类 + """ + + def __init__(self, fraction=None, chntext=None): + self.fraction = fraction + self.chntext = chntext + + def chntext2fraction(self): + denominator, numerator = self.chntext.split('分之') + return chn2num(numerator) + '/' + chn2num(denominator) + + def fraction2chntext(self): + numerator, denominator = self.fraction.split('/') + return num2chn(denominator) + '分之' + num2chn(numerator) + + +class Date: + """ + DATE类 + """ + + def __init__(self, date=None, chntext=None): + self.date = date + self.chntext = chntext + + # def chntext2date(self): + # chntext = self.chntext + # try: + # year, other = chntext.strip().split('年', maxsplit=1) + # year = Digit(chntext=year).digit2chntext() + '年' + # except ValueError: + # other = chntext + # year = '' + # if other: + # try: + # month, day = other.strip().split('月', maxsplit=1) + # month = Cardinal(chntext=month).chntext2cardinal() + '月' + # except ValueError: + # day = chntext + # month = '' + # if day: + # day = Cardinal(chntext=day[:-1]).chntext2cardinal() + day[-1] + # else: + # month = '' + # day = '' + # date = year + month + day + # self.date = date + # return self.date + + def date2chntext(self): + date = self.date + try: + year, other = date.strip().split('年', 1) + year = Digit(digit=year).digit2chntext() + '年' + except ValueError: + other = date + year = '' + if other: + try: + month, day = other.strip().split('月', 1) + month = Cardinal(cardinal=month).cardinal2chntext() + '月' + except ValueError: + day = date + month = '' + if day: + day = Cardinal(cardinal=day[:-1]).cardinal2chntext() + day[-1] + else: + month = '' + day = '' + chntext = year + month + day + self.chntext = chntext + return self.chntext + + +class Money: + """ + MONEY类 + """ + + def __init__(self, money=None, chntext=None): + self.money = money + self.chntext = chntext + + # def chntext2money(self): + # return self.money + + def money2chntext(self): + money = self.money + pattern = re.compile(r'(\d+(\.\d+)?)') + matchers = pattern.findall(money) + if matchers: + for matcher in matchers: + money = money.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext()) + self.chntext = money + return self.chntext + + +class Percentage: + """ + PERCENTAGE类 + """ + + def __init__(self, percentage=None, chntext=None): + self.percentage = percentage + self.chntext = chntext + + def chntext2percentage(self): + return chn2num(self.chntext.strip().strip('百分之')) + '%' + + def percentage2chntext(self): + return '百分之' + num2chn(self.percentage.strip().strip('%')) + + +def normalize_nsw(raw_text): + text = '^' + raw_text + '$' + + # 规范化日期 + pattern = re.compile(r"\D+((([089]\d|(19|20)\d{2})年)?(\d{1,2}月(\d{1,2}[日号])?)?)") + matchers = pattern.findall(text) + if matchers: + #print('date') + for matcher in matchers: + text = text.replace(matcher[0], Date(date=matcher[0]).date2chntext(), 1) + + # 规范化金钱 + pattern = re.compile(r"\D+((\d+(\.\d+)?)[多余几]?" + CURRENCY_UNITS + r"(\d" + CURRENCY_UNITS + r"?)?)") + matchers = pattern.findall(text) + if matchers: + #print('money') + for matcher in matchers: + text = text.replace(matcher[0], Money(money=matcher[0]).money2chntext(), 1) + + # 规范化固话/手机号码 + # 手机 + # http://www.jihaoba.com/news/show/13680 + # 移动:139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198 + # 联通:130、131、132、156、155、186、185、176 + # 电信:133、153、189、180、181、177 + pattern = re.compile(r"\D((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})\D") + matchers = pattern.findall(text) + if matchers: + #print('telephone') + for matcher in matchers: + text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(), 1) + # 固话 + pattern = re.compile(r"\D((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})\D") + matchers = pattern.findall(text) + if matchers: + # print('fixed telephone') + for matcher in matchers: + text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(fixed=True), 1) + + # 规范化分数 + pattern = re.compile(r"(\d+/\d+)") + matchers = pattern.findall(text) + if matchers: + #print('fraction') + for matcher in matchers: + text = text.replace(matcher, Fraction(fraction=matcher).fraction2chntext(), 1) + + # 规范化百分数 + text = text.replace('%', '%') + pattern = re.compile(r"(\d+(\.\d+)?%)") + matchers = pattern.findall(text) + if matchers: + #print('percentage') + for matcher in matchers: + text = text.replace(matcher[0], Percentage(percentage=matcher[0]).percentage2chntext(), 1) + + # 规范化纯数+量词 + pattern = re.compile(r"(\d+(\.\d+)?)[多余几]?" + COM_QUANTIFIERS) + matchers = pattern.findall(text) + if matchers: + #print('cardinal+quantifier') + for matcher in matchers: + text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1) + + # 规范化数字编号 + pattern = re.compile(r"(\d{4,32})") + matchers = pattern.findall(text) + if matchers: + #print('digit') + for matcher in matchers: + text = text.replace(matcher, Digit(digit=matcher).digit2chntext(), 1) + + # 规范化纯数 + pattern = re.compile(r"(\d+(\.\d+)?)") + matchers = pattern.findall(text) + if matchers: + #print('cardinal') + for matcher in matchers: + text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1) + + + # restore P2P, O2O, B2C, B2B etc + pattern = re.compile(r"(([a-zA-Z]+)二([a-zA-Z]+))") + matchers = pattern.findall(text) + if matchers: + # print('particular') + for matcher in matchers: + text = text.replace(matcher[0], matcher[1]+'2'+matcher[2], 1) + + return text.lstrip('^').rstrip('$') + + +def remove_erhua(text): + """ + 去除儿化音词中的儿: + 他女儿在那边儿 -> 他女儿在那边 + """ + + new_str='' + while re.search('儿',text): + a = re.search('儿',text).span() + remove_er_flag = 0 + + if ER_WHITELIST_PATTERN.search(text): + b = ER_WHITELIST_PATTERN.search(text).span() + if b[0] <= a[0]: + remove_er_flag = 1 + + if remove_er_flag == 0 : + new_str = new_str + text[0:a[0]] + text = text[a[1]:] + else: + new_str = new_str + text[0:b[1]] + text = text[b[1]:] + + text = new_str + text + return text + + +def remove_space(text): + tokens = text.split() + new = [] + for k,t in enumerate(tokens): + if k != 0: + if IN_EN_CHARS.get(tokens[k-1][-1]) and IN_EN_CHARS.get(t[0]): + new.append(' ') + new.append(t) + return ''.join(new) + + +class TextNorm: + def __init__(self, + to_banjiao:bool = False, + to_upper:bool = False, + to_lower:bool = False, + remove_fillers:bool = False, + remove_erhua:bool = False, + check_chars:bool = False, + remove_space:bool = False, + cc_mode:str = '', + ) : + self.to_banjiao = to_banjiao + self.to_upper = to_upper + self.to_lower = to_lower + self.remove_fillers = remove_fillers + self.remove_erhua = remove_erhua + self.check_chars = check_chars + self.remove_space = remove_space + + self.cc = None + if cc_mode: + from opencc import OpenCC # Open Chinese Convert: pip install opencc + self.cc = OpenCC(cc_mode) + + def __call__(self, text): + if self.cc: + text = self.cc.convert(text) + + if self.to_banjiao: + text = text.translate(QJ2BJ_TRANSFORM) + + if self.to_upper: + text = text.upper() + + if self.to_lower: + text = text.lower() + + if self.remove_fillers: + for c in FILLER_CHARS: + text = text.replace(c, '') + + if self.remove_erhua: + text = remove_erhua(text) + + text = normalize_nsw(text) + + text = text.translate(PUNCS_TRANSFORM) + + if self.check_chars: + for c in text: + if not IN_VALID_CHARS.get(c): + print(f'WARNING: illegal char {c} in: {text}', file=sys.stderr) + return '' + + if self.remove_space: + text = remove_space(text) + + return text + + +if __name__ == '__main__': + p = argparse.ArgumentParser() + + # normalizer options + p.add_argument('--to_banjiao', action='store_true', help='convert quanjiao chars to banjiao') + p.add_argument('--to_upper', action='store_true', help='convert to upper case') + p.add_argument('--to_lower', action='store_true', help='convert to lower case') + p.add_argument('--remove_fillers', action='store_true', help='remove filler chars such as "呃, 啊"') + p.add_argument('--remove_erhua', action='store_true', help='remove erhua chars such as "他女儿在那边儿 -> 他女儿在那边"') + p.add_argument('--check_chars', action='store_true' , help='skip sentences containing illegal chars') + p.add_argument('--remove_space', action='store_true' , help='remove whitespace') + p.add_argument('--cc_mode', choices=['', 't2s', 's2t'], default='', help='convert between traditional to simplified') + + # I/O options + p.add_argument('--log_interval', type=int, default=10000, help='log interval in number of processed lines') + p.add_argument('--has_key', action='store_true', help="will be deprecated, set --format ark instead") + p.add_argument('--format', type=str, choices=['txt', 'ark', 'tsv'], default='txt', help='input format') + p.add_argument('ifile', help='input filename, assume utf-8 encoding') + p.add_argument('ofile', help='output filename') + + args = p.parse_args() + + if args.has_key: + args.format = 'ark' + + normalizer = TextNorm( + to_banjiao = args.to_banjiao, + to_upper = args.to_upper, + to_lower = args.to_lower, + remove_fillers = args.remove_fillers, + remove_erhua = args.remove_erhua, + check_chars = args.check_chars, + remove_space = args.remove_space, + cc_mode = args.cc_mode, + ) + + ndone = 0 + with open(args.ifile, 'r', encoding = 'utf8') as istream, open(args.ofile, 'w+', encoding = 'utf8') as ostream: + if args.format == 'tsv': + reader = csv.DictReader(istream, delimiter = '\t') + assert('TEXT' in reader.fieldnames) + print('\t'.join(reader.fieldnames), file=ostream) + + for item in reader: + text = item['TEXT'] + + if text: + text = normalizer(text) + + if text: + item['TEXT'] = text + print('\t'.join([ item[f] for f in reader.fieldnames ]), file = ostream) + + ndone += 1 + if ndone % args.log_interval == 0: + print(f'text norm: {ndone} lines done.', file = sys.stderr, flush = True) + else: + for l in istream: + key, text = '', '' + if args.format == 'ark': # KALDI archive, line format: "key text" + cols = l.strip().split(maxsplit=1) + key, text = cols[0], cols[1] if len(cols) == 2 else '' + else: + text = l.strip() + + if text: + text = normalizer(text) + + if text: + if args.format == 'ark': + print(key + '\t' + text, file = ostream) + else: + print(text, file = ostream) + + ndone += 1 + if ndone % args.log_interval == 0: + print(f'text norm: {ndone} lines done.', file = sys.stderr, flush = True) + print(f'text norm: {ndone} lines done in total.', file = sys.stderr, flush = True) + diff --git a/utils/tokenizer.py b/utils/tokenizer.py new file mode 100644 index 0000000..fe01047 --- /dev/null +++ b/utils/tokenizer.py @@ -0,0 +1,97 @@ +import os +import subprocess +from enum import Enum +from typing import List + +from utils.logger import logger + + +class TokenizerType(str, Enum): + word = "word" + whitespace = "whitespace" + + +class LangType(str, Enum): + zh = "zh" + en = "en" + + +TOKENIZER_MAPPING = dict() +TOKENIZER_MAPPING['zh'] = TokenizerType.word +TOKENIZER_MAPPING['en'] = TokenizerType.whitespace +TOKENIZER_MAPPING['ru'] = TokenizerType.whitespace +TOKENIZER_MAPPING['ar'] = TokenizerType.whitespace +TOKENIZER_MAPPING['tr'] = TokenizerType.whitespace +TOKENIZER_MAPPING['es'] = TokenizerType.whitespace +TOKENIZER_MAPPING['pt'] = TokenizerType.whitespace +TOKENIZER_MAPPING['id'] = TokenizerType.whitespace +TOKENIZER_MAPPING['he'] = TokenizerType.whitespace +TOKENIZER_MAPPING['ja'] = TokenizerType.word +TOKENIZER_MAPPING['pl'] = TokenizerType.whitespace +TOKENIZER_MAPPING['de'] = TokenizerType.whitespace +TOKENIZER_MAPPING['fr'] = TokenizerType.whitespace +TOKENIZER_MAPPING['nl'] = TokenizerType.whitespace +TOKENIZER_MAPPING['el'] = TokenizerType.whitespace +TOKENIZER_MAPPING['vi'] = TokenizerType.whitespace +TOKENIZER_MAPPING['th'] = TokenizerType.whitespace +TOKENIZER_MAPPING['it'] = TokenizerType.whitespace +TOKENIZER_MAPPING['fa'] = TokenizerType.whitespace +TOKENIZER_MAPPING['ti'] = TokenizerType.word + + +class Tokenizer: + @classmethod + def norm_and_tokenize(cls, sentences: List[str], lang: LangType = None): + sentences = cls.norm(sentences, lang) + return cls.tokenize(sentences, lang) + + @classmethod + def tokenize(cls, sentences: List[str], lang: LangType = None): + tokenizer = TOKENIZER_MAPPING.get(lang, None) + if tokenizer == TokenizerType.word: + return [[ch for ch in sentence] for sentence in sentences] + elif tokenizer == TokenizerType.whitespace: + return [sentence.split() for sentence in sentences] + else: + logger.error("找不到对应的分词器") + exit(-1) + + @classmethod + def norm(cls, sentences: List[str], lang: LangType = None): + if lang == LangType.zh: + from utils.speechio import textnorm_zh as textnorm + + normalizer = textnorm.TextNorm( + to_banjiao=True, + to_upper=True, + to_lower=False, + remove_fillers=True, + remove_erhua=False, # 这里同批量识别不同,改成了 False + check_chars=False, + remove_space=False, + cc_mode="", + ) + return [normalizer(sentence) for sentence in sentences] + elif lang == LangType.en: + pwd = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + with open('./predict.txt', 'w', encoding='utf-8') as fp: + for idx, sentence in enumerate(sentences): + fp.write('%s\t%s\n' % (idx, sentence)) + subprocess.run( + f'PYTHONPATH={pwd}/utils/speechio python {pwd}/utils/speechio/textnorm_en.py --has_key --to_upper ./predict.txt ./predict_norm.txt', + shell=True, + check=True, + ) + sentence_norm = [] + with open('./predict_norm.txt', 'r', encoding='utf-8') as fp: + for line in fp.readlines(): + line_split_result = line.strip().split('\t', 1) + if len(line_split_result) >= 2: + sentence_norm.append(line_split_result[1]) + else: + sentence_norm.append("") + # 有可能没有 norm 后就没了 + return sentence_norm + else: + punc = "!?。"#$%&'()*+,-/:;<=>[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘'‛“”„‟…‧﹏.`! #$%^&*()_+-=|';\":/.,?><~·!#¥%……&*()——+-=“:’;、。,?》《{}" + return [sentence.translate(str.maketrans(dict.fromkeys(punc, " "))).lower() for sentence in sentences] diff --git a/utils/update_submit.py b/utils/update_submit.py new file mode 100644 index 0000000..9bdad4b --- /dev/null +++ b/utils/update_submit.py @@ -0,0 +1,26 @@ +import json +import os +import sys + +from utils.logger import logger +from utils.request import requests_retry_session + +lb_headers = {"Content-Type": "application/json"} +if os.getenv("LEADERBOARD_API_TOKEN"): + lb_headers["Authorization"] = "Bearer " + os.getenv("LEADERBOARD_API_TOKEN") + + +def change_product_available() -> None: + logger.info("更改为产品不可用...") + submit_id = str(os.getenv("SUBMIT_ID", -1)) + try: + requests_retry_session().post( + os.getenv( + "UPDATE_SUBMIT_URL", "http://contest.4pd.io:8080/submit/update" + ), + data=json.dumps({submit_id: {"product_avaliable": 0}}), + headers=lb_headers, + ) + except Exception as e: + logger.error(f"change product avaliable error, {e}") + sys.exit(1)