初始化项目,由ModelHub XC社区提供模型

Model: Muennighoff/SGPT-2.7B-weightedmean-nli-bitfit
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-13 16:13:28 +08:00
commit 95b3ffdb31
20 changed files with 150632 additions and 0 deletions

27
.gitattributes vendored Normal file
View File

@@ -0,0 +1,27 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bin.* filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zstandard filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

9
1_Pooling/config.json Normal file
View File

@@ -0,0 +1,9 @@
{
"word_embedding_dimension": 2560,
"pooling_mode_cls_token": false,
"pooling_mode_mean_tokens": false,
"pooling_mode_max_tokens": false,
"pooling_mode_mean_sqrt_len_tokens": false,
"pooling_mode_weightedmean_tokens": true,
"pooling_mode_lasttoken": false
}

72
README.md Normal file
View File

@@ -0,0 +1,72 @@
---
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- feature-extraction
- sentence-similarity
---
# SGPT-2.7B-weightedmean-nli-bitfit
## Usage
For usage instructions, refer to our codebase: https://github.com/Muennighoff/sgpt
## Evaluation Results
For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
## Training
The model was trained with the parameters:
**DataLoader**:
`sentence_transformers.datasets.NoDuplicatesDataLoader.NoDuplicatesDataLoader` of length 70456 with parameters:
```
{'batch_size': 8}
```
**Loss**:
`sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss` with parameters:
```
{'scale': 20.0, 'similarity_fct': 'cos_sim'}
```
Parameters of the fit()-Method:
```
{
"epochs": 1,
"evaluation_steps": 7045,
"evaluator": "sentence_transformers.evaluation.EmbeddingSimilarityEvaluator.EmbeddingSimilarityEvaluator",
"max_grad_norm": 1,
"optimizer_class": "<class 'transformers.optimization.AdamW'>",
"optimizer_params": {
"lr": 0.0002
},
"scheduler": "WarmupLinear",
"steps_per_epoch": null,
"warmup_steps": 7046,
"weight_decay": 0.01
}
```
## Full Model Architecture
```
SentenceTransformer(
(0): Transformer({'max_seq_length': 75, 'do_lower_case': False}) with Transformer model: GPTNeoModel
(1): Pooling({'word_embedding_dimension': 2560, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': True, 'pooling_mode_lasttoken': False})
)
```
## Citing & Authors
```bibtex
@article{muennighoff2022sgpt,
title={SGPT: GPT Sentence Embeddings for Semantic Search},
author={Muennighoff, Niklas},
journal={arXiv preprint arXiv:2202.08904},
year={2022}
}
```

82
config.json Normal file
View File

@@ -0,0 +1,82 @@
{
"_name_or_path": "EleutherAI/gpt-neo-2.7B",
"activation_function": "gelu_new",
"architectures": [
"GPTNeoModel"
],
"attention_dropout": 0,
"attention_layers": [
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local",
"global",
"local"
],
"attention_types": [
[
[
"global",
"local"
],
16
]
],
"bos_token_id": 50256,
"embed_dropout": 0,
"eos_token_id": 50256,
"gradient_checkpointing": false,
"hidden_size": 2560,
"initializer_range": 0.02,
"intermediate_size": null,
"layer_norm_epsilon": 1e-05,
"max_position_embeddings": 2048,
"model_type": "gpt_neo",
"num_heads": 20,
"num_layers": 32,
"resid_dropout": 0,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_specific_params": {
"text-generation": {
"do_sample": true,
"max_length": 50,
"temperature": 0.9
}
},
"tokenizer_class": "GPT2Tokenizer",
"torch_dtype": "float32",
"transformers_version": "4.20.0.dev0",
"use_cache": true,
"vocab_size": 50257,
"window_size": 256
}

View File

@@ -0,0 +1,7 @@
{
"__version__": {
"sentence_transformers": "2.1.0",
"transformers": "4.20.0.dev0",
"pytorch": "1.10.2"
}
}

View File

@@ -0,0 +1,7 @@
{
"askubuntu": 57.48,
"cqadupstack": 14.04,
"twitterpara": 73.4,
"scidocs": 74.69,
"avg": 54.9025
}

View File

@@ -0,0 +1,66 @@
{
"askubuntu": {
"map_askubuntu_title": 57.48,
"p@1_askubuntu_title": 56.99,
"p@5_askubuntu_title": 43.23,
"mrr_askubuntu_title": 70.74
},
"cqadupstack": {
"map@100_cqadupstack_unix": 14.82,
"ndcg@10_cqadupstack_unix": 16.39,
"map@100_cqadupstack_gaming": 26.14,
"ndcg@10_cqadupstack_gaming": 28.7,
"map@100_cqadupstack_wordpress": 4.64,
"ndcg@10_cqadupstack_wordpress": 5.88,
"map@100_cqadupstack_stats": 15.42,
"ndcg@10_cqadupstack_stats": 16.15,
"map@100_cqadupstack_tex": 8.28,
"ndcg@10_cqadupstack_tex": 8.96,
"map@100_cqadupstack_english": 15.02,
"ndcg@10_cqadupstack_english": 16.54,
"map@100_cqadupstack_programmers": 13.27,
"ndcg@10_cqadupstack_programmers": 14.41,
"map@100_cqadupstack_mathematica": 11.74,
"ndcg@10_cqadupstack_mathematica": 13.47,
"map@100_cqadupstack_physics": 16.81,
"ndcg@10_cqadupstack_physics": 18.61,
"map@100_cqadupstack_gis": 15.47,
"ndcg@10_cqadupstack_gis": 16.67,
"map@100_cqadupstack_webmasters": 9.72,
"ndcg@10_cqadupstack_webmasters": 10.48,
"map@100_cqadupstack_android": 17.12,
"ndcg@10_cqadupstack_android": 19.1,
"map@100_cqadupstack_avg": 14.04,
"ndcg@10_cqadupstack_avg": 15.45
},
"twitterpara": {
"ap_twitter_twitterurl": 75.84,
"spearman_twitter_twitterurl": 70.81,
"ap_twitter_pit": 70.96,
"spearman_twitter_pit": 56.64,
"ap_twitter_avg": 73.4,
"spearman_twitter_avg": 63.73
},
"scidocs": {
"map_scidocs_cite_euclidean": 72.29,
"ndcg_scidocs_cite_euclidean": 86.43,
"map_scidocs_cite_cosine": 72.29,
"ndcg_scidocs_cite_cosine": 86.43,
"map_scidocs_cocite_euclidean": 75.36,
"ndcg_scidocs_cocite_euclidean": 88.17,
"map_scidocs_cocite_cosine": 75.36,
"ndcg_scidocs_cocite_cosine": 88.17,
"map_scidocs_coview_euclidean": 76.46,
"ndcg_scidocs_coview_euclidean": 87.8,
"map_scidocs_coview_cosine": 76.46,
"ndcg_scidocs_coview_cosine": 87.8,
"map_scidocs_coread_euclidean": 74.65,
"ndcg_scidocs_coread_euclidean": 87.0,
"map_scidocs_coread_cosine": 74.65,
"ndcg_scidocs_coread_cosine": 87.0,
"map_scidocs_euclidean_avg": 74.69,
"ndcg_scidocs_euclidean_avg": 87.35,
"map_scidocs_cosine_avg": 74.69,
"ndcg_scidocs_cosine_avg": 87.35
}
}

1
eval/quora.json Normal file
View File

@@ -0,0 +1 @@
{"SGPT-2.7B-weightedmean-nli-bitfit": {"quora": {"NDCG@1": 0.7461, "NDCG@3": 0.79099, "NDCG@5": 0.80989, "NDCG@10": 0.82645, "NDCG@100": 0.84542, "NDCG@1000": 0.8478}}}

View File

@@ -0,0 +1,12 @@
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
0,440,0.8547642811744708,0.8604042254467369,0.8564294919751527,0.8580900991676403,0.8592109600985026,0.8606819249266364,0.7727095196030622,0.7702266970220912
0,880,0.8605577862452674,0.8664787776815549,0.8594251818914206,0.8613580463896765,0.8625107129203857,0.8646695024116025,0.7777588243182069,0.7747923460803676
0,1320,0.8641797068951016,0.8701385809189678,0.8600269510402324,0.8623718727121046,0.8630083477192438,0.8655171844912587,0.7819169473167653,0.7801439440796124
0,1760,0.863861676991937,0.8699711768888497,0.8610538565702486,0.8639273844617363,0.8638166020097834,0.8668464202840234,0.7762041862089968,0.7743392283299438
0,2200,0.8667562252351253,0.8722428457163393,0.8609922973987619,0.8640152137038429,0.8637144738269167,0.866880911411029,0.7797478785593531,0.7772643228752733
0,2640,0.8658548494423817,0.8702345916613825,0.8583041242377912,0.8613687812725296,0.8606700693927242,0.8638960499205391,0.7781537908033099,0.7761412829543439
0,3080,0.8643810926871549,0.8692556763950754,0.857059374227981,0.8600999939200575,0.8594169577592663,0.8625821753483399,0.7756975962910497,0.7737551514144106
0,3520,0.8661238263202532,0.8701186430665476,0.8576723473616406,0.8614191549727733,0.8600335298933423,0.8639555339473548,0.777608544440925,0.7758958610767906
0,3960,0.8659908588458113,0.8699200451809654,0.8570869831042444,0.860628569017929,0.8594235992474281,0.8631523750300969,0.7746604455122261,0.7731850887434243
0,4400,0.8662536580670237,0.8702266564863804,0.8571446774934243,0.8608170966035958,0.8594735292127258,0.8633222114462352,0.7763396949906898,0.7751198872316742
0,-1,0.8662583413659,0.8702579991508459,0.8571471752177104,0.8608379873119169,0.8594701305965878,0.8633306420570356,0.7763449731290442,0.7751015402439239
1 epoch steps cosine_pearson cosine_spearman euclidean_pearson euclidean_spearman manhattan_pearson manhattan_spearman dot_pearson dot_spearman
2 0 440 0.8547642811744708 0.8604042254467369 0.8564294919751527 0.8580900991676403 0.8592109600985026 0.8606819249266364 0.7727095196030622 0.7702266970220912
3 0 880 0.8605577862452674 0.8664787776815549 0.8594251818914206 0.8613580463896765 0.8625107129203857 0.8646695024116025 0.7777588243182069 0.7747923460803676
4 0 1320 0.8641797068951016 0.8701385809189678 0.8600269510402324 0.8623718727121046 0.8630083477192438 0.8655171844912587 0.7819169473167653 0.7801439440796124
5 0 1760 0.863861676991937 0.8699711768888497 0.8610538565702486 0.8639273844617363 0.8638166020097834 0.8668464202840234 0.7762041862089968 0.7743392283299438
6 0 2200 0.8667562252351253 0.8722428457163393 0.8609922973987619 0.8640152137038429 0.8637144738269167 0.866880911411029 0.7797478785593531 0.7772643228752733
7 0 2640 0.8658548494423817 0.8702345916613825 0.8583041242377912 0.8613687812725296 0.8606700693927242 0.8638960499205391 0.7781537908033099 0.7761412829543439
8 0 3080 0.8643810926871549 0.8692556763950754 0.857059374227981 0.8600999939200575 0.8594169577592663 0.8625821753483399 0.7756975962910497 0.7737551514144106
9 0 3520 0.8661238263202532 0.8701186430665476 0.8576723473616406 0.8614191549727733 0.8600335298933423 0.8639555339473548 0.777608544440925 0.7758958610767906
10 0 3960 0.8659908588458113 0.8699200451809654 0.8570869831042444 0.860628569017929 0.8594235992474281 0.8631523750300969 0.7746604455122261 0.7731850887434243
11 0 4400 0.8662536580670237 0.8702266564863804 0.8571446774934243 0.8608170966035958 0.8594735292127258 0.8633222114462352 0.7763396949906898 0.7751198872316742
12 0 -1 0.8662583413659 0.8702579991508459 0.8571471752177104 0.8608379873119169 0.8594701305965878 0.8633306420570356 0.7763449731290442 0.7751015402439239

View File

@@ -0,0 +1,2 @@
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
-1,-1,0.8391456868332672,0.8466346397745724,0.8352041704150074,0.8363291003222771,0.8369317879915439,0.8385788509219374,0.7345451515355464,0.7176283688660806
1 epoch steps cosine_pearson cosine_spearman euclidean_pearson euclidean_spearman manhattan_pearson manhattan_spearman dot_pearson dot_spearman
2 -1 -1 0.8391456868332672 0.8466346397745724 0.8352041704150074 0.8363291003222771 0.8369317879915439 0.8385788509219374 0.7345451515355464 0.7176283688660806

50001
merges.txt Normal file

File diff suppressed because it is too large Load Diff

14
modules.json Normal file
View File

@@ -0,0 +1,14 @@
[
{
"idx": 0,
"name": "0",
"path": "",
"type": "sentence_transformers.models.Transformer"
},
{
"idx": 1,
"name": "1",
"path": "1_Pooling",
"type": "sentence_transformers.models.Pooling"
}
]

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3328d67e21cfc12696ed04be3b111723b5d0e712ecc53de8cacf360745de42c5
size 9996965077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:020dafb349b69af0f5e674afb6950199a2b705093f1a618500e9a55809240b52
size 742637183

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1faacf954a349388fab25a3b20e1f0a1e09d87d1eb840569b3af5e4333b3785a
size 32846

View File

@@ -0,0 +1,4 @@
{
"max_seq_length": 75,
"do_lower_case": false
}

1
special_tokens_map.json Normal file
View File

@@ -0,0 +1 @@
{"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>", "pad_token": "<|endoftext|>"}

100316
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

1
tokenizer_config.json Normal file
View File

@@ -0,0 +1 @@
{"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}

1
vocab.json Normal file

File diff suppressed because one or more lines are too long