From 04e52feb63e0ddfc3cfba5df4dd6a7472691ddd5 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 10 Apr 2026 23:59:01 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: CloneBO/OracleLM Source: Original Platform --- .gitattributes | 35 +++++++ README.md | 201 ++++++++++++++++++++++++++++++++++++++++ config.json | 29 ++++++ generation_config.json | 6 ++ model.safetensors | 3 + special_tokens_map.json | 9 ++ tokenizer.json | 179 +++++++++++++++++++++++++++++++++++ tokenizer_config.json | 59 ++++++++++++ vocab.txt | 29 ++++++ 9 files changed, 550 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 vocab.txt diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..cacd7c3 --- /dev/null +++ b/README.md @@ -0,0 +1,201 @@ +--- +library_name: transformers +tags: [] +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + +This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated. + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + diff --git a/config.json b/config.json new file mode 100644 index 0000000..a4df228 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "/scratch/yk2516/repos/bighat/bighat_antibody_design/exp/llm_heavy_chain_model_llama2_oracle/2024-05-07_13-35-09", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 512, + "initializer_range": 0.02, + "intermediate_size": 2048, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 4, + "num_hidden_layers": 12, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_clone_attention": false, + "vocab_size": 29 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..69b7806 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..45574ff --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41614f50af075f49bdc72a7a70cc0fcd1412c9a72d41db3efd2f5d16e643400 +size 201508760 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5cc733 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,9 @@ +{ + "bos_token": "[CLS]", + "cls_token": "[CLS]", + "eos_token": "[SEP]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..d7ead1a --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,179 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "BertNormalizer", + "clean_text": true, + "handle_chinese_chars": true, + "strip_accents": null, + "lowercase": false + }, + "pre_tokenizer": { + "type": "BertPreTokenizer" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 2 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 3 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": { + "type": "WordPiece", + "prefix": "##", + "cleanup": true + }, + "model": { + "type": "WordPiece", + "unk_token": "[UNK]", + "continuing_subword_prefix": "##", + "max_input_chars_per_word": 100, + "vocab": { + "[PAD]": 0, + "[UNK]": 1, + "[CLS]": 2, + "[SEP]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "D": 7, + "E": 8, + "F": 9, + "G": 10, + "H": 11, + "I": 12, + "K": 13, + "L": 14, + "M": 15, + "N": 16, + "P": 17, + "Q": 18, + "R": 19, + "S": 20, + "T": 21, + "V": 22, + "W": 23, + "Y": 24, + "-": 25, + "[ClSep]": 26, + "[AbHC]": 27, + "[AbLC]": 28 + } + } +} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8bf09c7 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,59 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "[CLS]", + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "do_basic_tokenize": true, + "do_lower_case": false, + "eos_token": "[SEP]", + "mask_token": "[MASK]", + "model_max_length": 1000000000000000019884624838656, + "never_split": null, + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "unk_token": "[UNK]" +} diff --git a/vocab.txt b/vocab.txt new file mode 100644 index 0000000..672a492 --- /dev/null +++ b/vocab.txt @@ -0,0 +1,29 @@ +[PAD] +[UNK] +[CLS] +[SEP] +[MASK] +A +C +D +E +F +G +H +I +K +L +M +N +P +Q +R +S +T +V +W +Y +- +[ClSep] +[AbHC] +[AbLC]