初始化项目，由ModelHub XC社区提供模型

Model: cross-encoder/quora-roberta-large Source: Original Platform
2026-05-13 17:04:38 +08:00
commit 76d3bebabb
25 changed files with 365767 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,10 @@
 *.bin.* filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tar.gz filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 model.safetensors filter=lfs diff=lfs merge=lfs -text
--- a/CEBinaryClassificationEvaluator_Quora-dev_results.csv
+++ b/CEBinaryClassificationEvaluator_Quora-dev_results.csv
@@ -0,0 +1,15 @@
 epoch,steps,Accuracy,Accuracy_Threshold,F1,F1_Threshold,Precision,Recall,Average_Precision
 0,5000,0.8683833274728842,0.5240041017532349,0.8198757763975154,0.3768240511417389,0.7744990392533626,0.8709005324484914,0.8731992606250942
 0,10000,0.8685997132887939,0.6672844886779785,0.8178615982974976,0.4671083688735962,0.7796432318992654,0.8600200632764874,0.8729835294247004
 0,15000,0.867139109031403,0.1811354160308838,0.8167750946447605,0.10655976086854935,0.7798287478944413,0.8573964040435219,0.8630894015038928
 0,20000,0.8652457331421926,0.5867656469345093,0.810907497992261,0.042630940675735474,0.7694492552823,0.8570877382514083,0.8491018647322772
 0,25000,0.8699791728652186,0.19235554337501526,0.8189171463965605,0.11564554274082184,0.7757299647960241,0.8671965429431283,0.8779901051208325
 0,30000,0.8738470693246058,0.1700415164232254,0.8265335858953811,0.12816089391708374,0.7827916925412268,0.8754533528821669,0.880091749112012
 0,-1,0.8736036352817073,0.24405449628829956,0.8273541780947156,0.17230889201164246,0.7863200333657723,0.8729068600972297,0.8822842182205346
 1,5000,0.8749830948581321,0.18700861930847168,0.8258481907771007,0.1650511920452118,0.8056065164746459,0.847133266455745,0.8808449128337521
 1,10000,0.8773362906061508,0.056389499455690384,0.8286927441475971,0.05023118108510971,0.8076021678628973,0.8509144224091365,0.8806352233982097
 1,15000,0.8772551459251846,0.08140122890472412,0.8288128056914184,0.040914058685302734,0.7972057880105495,0.8630295547495949,0.8724934608981681
 1,20000,0.876146168618647,0.02961307018995285,0.8269898751016186,0.008039627224206924,0.7934482025101043,0.8634925534377652,0.8820982567462179
 1,25000,0.8766871331584215,0.018407132476568222,0.8281409975135293,0.006738942116498947,0.7869900618528042,0.8738328574735705,0.8403278253386028
 1,30000,0.8772551459251846,0.03199715167284012,0.8317026152111446,0.014339910820126534,0.7866382276042383,0.8822440003086658,0.8802174694024198
 1,-1,0.8785534608206432,0.03839807212352753,0.8313170836773521,0.012577023357152939,0.7772184185796751,0.8935103017208118,0.8790503615896275
--- a/README.md
+++ b/README.md
@@ -0,0 +1,32 @@
 ---
 license: apache-2.0
 datasets:
 - sentence-transformers/quora-duplicates
 language:
 - en
 base_model:
 - FacebookAI/roberta-large
 pipeline_tag: text-ranking
 library_name: sentence-transformers
 tags:
 - transformers
 ---
 # Cross-Encoder for Quora Duplicate Questions Detection
 This model was trained using [SentenceTransformers](https://sbert.net) [Cross-Encoder](https://www.sbert.net/examples/applications/cross-encoder/README.html) class.
 ## Training Data
 This model was trained on the [Quora Duplicate Questions](https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs) dataset. The model will predict a score between 0 and 1 how likely the two given questions are duplicates.
 Note: The model is not suitable to estimate the similarity of questions, e.g. the two questions "How to learn Java" and "How to learn Python" will result in a rather low score, as these are not duplicates.
 ## Usage and Performance
 Pre-trained models can be used like this:
 ```python
 from sentence_transformers import CrossEncoder
 model = CrossEncoder('cross-encoder/quora-roberta-large')
 scores = model.predict([('Question 1', 'Question 2'), ('Question 3', 'Question 4')])
 ```
 You can use this model also without sentence_transformers and by just using Transformers ``AutoModel`` class
--- a/config.json
+++ b/config.json
@@ -0,0 +1,29 @@
 {
  "_num_labels": 3,
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "type_vocab_size": 1,
  "vocab_size": 50265
 }
--- a/flax_model.msgpack
+++ b/flax_model.msgpack
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:6255235e35f928554c2a339d9cbfc3c34992670ca1b713ab56bac32079d8bb63
 size 1421457127
--- a/merges.txt
+++ b/merges.txt
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:494d0c28d907f5c1a899243a841e0303556f1c566faec6d885e233f00991d11a
 size 1421495516
--- a/onnx/model.onnx
+++ b/onnx/model.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:8ff7d89da57f09f513f786c533532ffba32f743d70ad1190c85493b48da2289a
 size 1422070180
--- a/onnx/model_O1.onnx
+++ b/onnx/model_O1.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d2a4033a3e82bf3496f1093de9b5c34e875f583ad28b7d82255b289eee75bc24
 size 1421867443
--- a/onnx/model_O2.onnx
+++ b/onnx/model_O2.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:5350eb94c12b098b01d67d953097686f992b9a6f17bd8081b69792d32a39fd94
 size 1421515824
--- a/onnx/model_O3.onnx
+++ b/onnx/model_O3.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:391b4012b24321ce8a45c77cbf6d92d9bcde3a100a3d69ae2e86c96f05b5d9a7
 size 1421515535
--- a/onnx/model_O4.onnx
+++ b/onnx/model_O4.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:227c1a085f70223fc7fd99edb670b1a77e1cc68f2582fc31eff34a63d4c04ca7
 size 711066308
--- a/onnx/model_qint8_arm64.onnx
+++ b/onnx/model_qint8_arm64.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d3f234afa2e224b4913f291e26fd8d89db614ae0b4ff1313f4b44d96ce92ce2d
 size 358595115
--- a/onnx/model_qint8_avx512.onnx
+++ b/onnx/model_qint8_avx512.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d3f234afa2e224b4913f291e26fd8d89db614ae0b4ff1313f4b44d96ce92ce2d
 size 358595115
--- a/onnx/model_qint8_avx512_vnni.onnx
+++ b/onnx/model_qint8_avx512_vnni.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d3f234afa2e224b4913f291e26fd8d89db614ae0b4ff1313f4b44d96ce92ce2d
 size 358595115
--- a/onnx/model_quint8_avx2.onnx
+++ b/onnx/model_quint8_avx2.onnx
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:53ad36c580f67cab3188e0425473fd93ea6af2424b7185b0b7c8da1a9a95bb3c
 size 358595114
--- a/openvino/openvino_model.bin
+++ b/openvino/openvino_model.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b9e2595c9f5a8203ff78793c537b34d7e11d4b2630c020f66a1284daa48e21d5
 size 1421447348
--- a/openvino/openvino_model.xml
+++ b/openvino/openvino_model.xml
--- a/openvino/openvino_model_qint8_quantized.bin
+++ b/openvino/openvino_model_qint8_quantized.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:69cca64d73dafe56512c3e4732bcf5d312c416a86466a69066e3a1bc9e2fde87
 size 358122168
--- a/openvino/openvino_model_qint8_quantized.xml
+++ b/openvino/openvino_model_qint8_quantized.xml
--- a/pytorch_model.bin
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:a01c9ec97da7dcd86815855a2d91b43c96db6518c7da0f917bcbe00ced5d13e3
 size 1421616585
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,51 @@
 {
  "bos_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "cls_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "mask_token": {
    "content": "<mask>",
    "lstrip": true,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<pad>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "sep_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,59 @@
 {
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<s>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<pad>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "</s>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "3": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "50264": {
      "content": "<mask>",
      "lstrip": true,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "bos_token": "<s>",
  "clean_up_tokenization_spaces": false,
  "cls_token": "<s>",
  "eos_token": "</s>",
  "errors": "replace",
  "extra_special_tokens": {},
  "full_tokenizer_file": null,
  "mask_token": "<mask>",
  "model_max_length": 512,
  "pad_token": "<pad>",
  "sep_token": "</s>",
  "tokenizer_class": "RobertaTokenizer",
  "trim_offsets": true,
  "unk_token": "<unk>"
 }
--- a/vocab.json
+++ b/vocab.json