初始化项目，由ModelHub XC社区提供模型

Model: cross-encoder/quora-distilroberta-base Source: Original Platform
2026-05-13 16:50:07 +08:00
commit 2b5793ee83
25 changed files with 318858 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,10 @@
 *.bin.* filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tar.gz filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 model.safetensors filter=lfs diff=lfs merge=lfs -text
--- a/CEBinaryClassificationEvaluator_Quora-dev_results.csv
+++ b/CEBinaryClassificationEvaluator_Quora-dev_results.csv
@@ -0,0 +1,15 @@
 epoch,steps,Accuracy,Accuracy_Threshold,F1,F1_Threshold,Precision,Recall,Average_Precision
 0,5000,0.8452570933975277,0.2983477711677551,0.7900497512437811,0.18507549166679382,0.7322310783215862,0.8577822362836639,0.8396229747981989
 0,10000,0.857347650861486,0.5213649272918701,0.8031484845246999,0.33029448986053467,0.7457997089562112,0.8700517015201791,0.8545970007991148
 0,15000,0.8609721132779746,0.1738070547580719,0.8078387142252925,0.04706580936908722,0.7435281904885487,0.8843274944054326,0.8683121532515892
 0,20000,0.8651104920072489,0.47281914949417114,0.8140117360812937,0.28083980083465576,0.7588898525585429,0.8777683463230187,0.8688431110199832
 0,25000,0.8627572962592303,0.3951731324195862,0.8135375846099039,0.1231781542301178,0.7556084971213024,0.8810865035882398,0.8680829287785565
 0,30000,0.8675989288902113,0.23542040586471558,0.8190337283500455,0.15778020024299622,0.7763721830499102,0.8666563778069295,0.8753982019828903
 0,-1,0.8682751345649292,0.18975578248500824,0.8190918322936311,0.1217166930437088,0.7727366043933483,0.8713635311366618,0.8752323608217697
 1,5000,0.8672202537123691,0.11032938957214355,0.8186121022354911,0.06648033857345581,0.7749362376783622,0.867505208735242,0.8734906535795933
 1,10000,0.868789050877715,0.39354807138442993,0.820622596328278,0.14097964763641357,0.7744299116619873,0.8726753607531446,0.8753506581928413
 1,15000,0.8691406778285683,0.14086419343948364,0.8205202207871857,0.04024771973490715,0.7704607046070461,0.8775368469789335,0.8762103576779169
 1,20000,0.8707094749939142,0.12691470980644226,0.8241213305824012,0.04960266500711441,0.7761112626124897,0.8784628443552743,0.8740756841801574
 1,25000,0.8700062210922074,0.09521728754043579,0.8238741182854042,0.05193943902850151,0.7754323845839575,0.878771510147388,0.8754125303894846
 1,30000,0.8710611019447675,0.13264580070972443,0.8236196879453356,0.06295235455036163,0.7822043309272626,0.8696658692800371,0.8759877493381109
 1,-1,0.8706824267669254,0.06451858580112457,0.8241568175266647,0.03969372808933258,0.7730683431352667,0.882475499652751,0.8747959164698776
--- a/README.md
+++ b/README.md
@@ -0,0 +1,32 @@
 ---
 license: apache-2.0
 datasets:
 - sentence-transformers/quora-duplicates
 language:
 - en
 base_model:
 - distilbert/distilroberta-base
 pipeline_tag: text-ranking
 library_name: sentence-transformers
 tags:
 - transformers
 ---
 # Cross-Encoder for Quora Duplicate Questions Detection
 This model was trained using [SentenceTransformers](https://sbert.net) [Cross-Encoder](https://www.sbert.net/examples/applications/cross-encoder/README.html) class.
 ## Training Data
 This model was trained on the [Quora Duplicate Questions](https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs) dataset. The model will predict a score between 0 and 1 how likely the two given questions are duplicates.
 Note: The model is not suitable to estimate the similarity of questions, e.g. the two questions "How to learn Java" and "How to learn Python" will result in a rather low score, as these are not duplicates.
 ## Usage and Performance
 Pre-trained models can be used like this:
 ```python
 from sentence_transformers import CrossEncoder
 model = CrossEncoder('cross-encoder/quora-distilroberta-base')
 scores = model.predict([('Question 1', 'Question 2'), ('Question 3', 'Question 4')])
 ```
 You can use this model also without sentence_transformers and by just using Transformers ``AutoModel`` class
--- a/config.json
+++ b/config.json
@@ -0,0 +1,28 @@
 {
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 1,
  "type_vocab_size": 1,
  "vocab_size": 50265
 }
--- a/flax_model.msgpack
+++ b/flax_model.msgpack
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:81ee85c07ec9df3edd54b02c4fc70881ee5e1903d0d1ab0b79e3eb4e876cf816
 size 328480487
--- a/merges.txt
+++ b/merges.txt
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:73fd14ad7d08f3ef30eb25841c8f4ba89e91230f48159279233b37015ccb33fb
 size 328493408
--- a/onnx/model.onnx
+++ b/onnx/model.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:4cc6fc9e972b5074b6cb41e3cc1667347909a8c240b6913d143bb10ec5e5df26
 size 328643805
--- a/onnx/model_O1.onnx
+++ b/onnx/model_O1.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b111bc4bb35a5bf188201de27033b14a82e55e3dad241541dd051f4e49b329fc
 size 328592269
--- a/onnx/model_O2.onnx
+++ b/onnx/model_O2.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d16a73b270d98e13a530eb2c48c8aadc1a3a2fcb2c09bb8ce4ab57d70c84356c
 size 328501701
--- a/onnx/model_O3.onnx
+++ b/onnx/model_O3.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:081739b944db5ac3d0f40a2ffbe38289b9261380f6c45d535ca0a2bce370e64f
 size 328501632
--- a/onnx/model_O4.onnx
+++ b/onnx/model_O4.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:9d7bd67835cb0c03f5fd1b394885683e1543bd9c85d93ec13c3b4cba50b88fca
 size 164333682
--- a/onnx/model_qint8_arm64.onnx
+++ b/onnx/model_qint8_arm64.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:845ea205de16c9c7f0587dd29b7e45a8c31f8e1ea2766e69932a28af2bcb8ed5
 size 82821511
--- a/onnx/model_qint8_avx512.onnx
+++ b/onnx/model_qint8_avx512.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:845ea205de16c9c7f0587dd29b7e45a8c31f8e1ea2766e69932a28af2bcb8ed5
 size 82821511
--- a/onnx/model_qint8_avx512_vnni.onnx
+++ b/onnx/model_qint8_avx512_vnni.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:845ea205de16c9c7f0587dd29b7e45a8c31f8e1ea2766e69932a28af2bcb8ed5
 size 82821511
--- a/onnx/model_quint8_avx2.onnx
+++ b/onnx/model_quint8_avx2.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:9bab6d0016bcfc2c24c9bc0f747a9a6ceb53f262738ea1df3144aa7a764c1ce5
 size 82821509
--- a/openvino/openvino_model.bin
+++ b/openvino/openvino_model.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:0cee454a46a5a2a7a8c9cf648f2aab3e51a038cf8e1550d911c3eedecf6b9e9b
 size 328480948
--- a/openvino/openvino_model.xml
+++ b/openvino/openvino_model.xml
--- a/openvino/openvino_model_qint8_quantized.bin
+++ b/openvino/openvino_model_qint8_quantized.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:9c0dadd29f6adc34563787ddd2e9692a8ac8f9bf0e2b4f89dce0197f649f120c
 size 82817624
--- a/openvino/openvino_model_qint8_quantized.xml
+++ b/openvino/openvino_model_qint8_quantized.xml
--- a/pytorch_model.bin
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:e1642652001f9d60e227169da750327d26d6d37b531f90d79b5cd1516a94b20a
 size 328525929
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,51 @@
 {
  "bos_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "cls_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "mask_token": {
    "content": "<mask>",
    "lstrip": true,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<pad>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "sep_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,59 @@
 {
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<s>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<pad>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "</s>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "3": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "50264": {
      "content": "<mask>",
      "lstrip": true,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "bos_token": "<s>",
  "clean_up_tokenization_spaces": false,
  "cls_token": "<s>",
  "eos_token": "</s>",
  "errors": "replace",
  "extra_special_tokens": {},
  "full_tokenizer_file": null,
  "mask_token": "<mask>",
  "model_max_length": 512,
  "pad_token": "<pad>",
  "sep_token": "</s>",
  "tokenizer_class": "RobertaTokenizer",
  "trim_offsets": true,
  "unk_token": "<unk>"
 }
--- a/vocab.json
+++ b/vocab.json