From 6c3d9bb7c76ce95e1eeaadedf473a7e2a68d9158 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sat, 6 Jun 2026 15:08:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: osunlp/TableLlama Source: Original Platform --- .gitattributes | 49 ++++++++++++++++++++ README.md | 78 ++++++++++++++++++++++++++++++++ added_tokens.json | 3 ++ config.json | 29 ++++++++++++ configuration.json | 1 + generation_config.json | 10 ++++ pytorch_model-00001-of-00002.bin | 3 ++ pytorch_model-00002-of-00002.bin | 3 ++ pytorch_model.bin.index.json | 3 ++ special_tokens_map.json | 24 ++++++++++ tokenizer.model | 3 ++ tokenizer_config.json | 37 +++++++++++++++ trainer_state.json | 3 ++ training_args.bin | 3 ++ 14 files changed, 249 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 added_tokens.json create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 pytorch_model-00001-of-00002.bin create mode 100644 pytorch_model-00002-of-00002.bin create mode 100644 pytorch_model.bin.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7cc7a77 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,49 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +trainer_state.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..63c5e91 --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +--- +license: cc-by-4.0 +language: +- en +datasets: +- osunlp/TableInstruct +--- + +--- +# TableLlama: Towards Open Large Generalist Models for Tables + +Project Page: [https://osu-nlp-group.github.io/TableLlama/](https://osu-nlp-group.github.io/TableLlama/) + +Paper: [https://arxiv.org/abs/2311.09206](https://arxiv.org/abs/2311.09206) + +Dataset: [https://huggingface.co/datasets/osunlp/TableInstruct/](https://huggingface.co/datasets/osunlp/TableInstruct/) + +Code: [https://osu-nlp-group.github.io/TableLlama/](https://osu-nlp-group.github.io/TableLlama/) + + +## Introduction +We introduce TableLlama, an open-source large generalist model specifically tailored for various table-based tasks. The TableLlama model is trained on 🤗 [TableInstruct Dataset](https://huggingface.co/datasets/osunlp/TableInstruct), a meticulously curated instruction tuning dataset for tables. TableLlama is tuned on 2.6 million table-based task data, and can handle up to 8K context! + + +## Model + +[TableLlama-7B](https://huggingface.co/osunlp/TableLlama/) + + +## Data +The models are trained on the 🤗 [TableInstruct Dataset](https://huggingface.co/datasets/osunlp/TableInstruct), which includes a comprehensive table-based instruction tuning dataset that covers a variety of real-world tables and realistic tasks. We include 14 datasets of 11 tasks in total. Check out the dataset card for more details. + + +## Training Procedure +The models are fine-tuned with the TableInstruct dataset using LongLoRA (7B), fully fine-tuning version as the base model, which replaces the vanilla attention mechanism of the original Llama-2 (7B) with shift short attention. The training takes 9 days on a 48 80*A100 cluster. Check out our paper for more details. + +## Evaluation +The models are evaluated on 8 in-domain datasets of 8 tasks and 6 out-of-domain datasets of 4 tasks. + + +## Usage +You can use the models through Huggingface's Transformers library. +Check our Github repo for more advanced use: [https://osu-nlp-group.github.io/TableLlama/](https://osu-nlp-group.github.io/TableLlama/) + +## Prompt Format +``` +Below is an instruction that describes a task, paired with an input that provides further context. Write a response that +appropriately completes the request. + +### Instruction: +{instruction} + +### Input: +{input} + +### Question: +{question} + +### Response: +``` + +## Limitations +We've tried our best to build table generalist models. However, we acknowledge that the models' performance may vary based on the complexity and specifics of the table tasks and datasets. Still not all table-based tasks can be covered comprehensively. + + +## Citation +If you use the models, data, or code from this project, please cite the original paper: + +``` +@misc{zhang2023tablellama, + title={TableLlama: Towards Open Large Generalist Models for Tables}, + author={Tianshu Zhang and Xiang Yue and Yifei Li and Huan Sun}, + year={2023}, + eprint={2311.09206}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` \ No newline at end of file diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..e41416d --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32000 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..2668095 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "/ML-A800/models/Llama-2-7b-longlora-8k-ft", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.0, + "type": "linear" + }, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.33.2", + "use_cache": true, + "vocab_size": 32001 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..31de93e --- /dev/null +++ b/generation_config.json @@ -0,0 +1,10 @@ +{ + "bos_token_id": 1, + "do_sample": true, + "eos_token_id": 2, + "max_length": 4096, + "pad_token_id": 0, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.33.2" +} diff --git a/pytorch_model-00001-of-00002.bin b/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000..e6a449d --- /dev/null +++ b/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4dfc8c2b016491360b709247e752a8eacec523b7a3778a06a7278a3042acb1 +size 9976628314 diff --git a/pytorch_model-00002-of-00002.bin b/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000..5c1cf78 --- /dev/null +++ b/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803ed0fec29e651d70fc7143939c86da50dadf898020671d6721ea608e2535f4 +size 3500318979 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..3432508 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e54729ec2da172881345ee8c2cc7da607561c76d5520a867bc12dfc1559dcb4 +size 23950 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..21edfc8 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "[PAD]", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..af2109d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 8192, + "pad_token": null, + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..e0367e5 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52357768c1583a7fb454adb192d832c5c1ec66cc4845de9cd5ba00e82b77681 +size 5376676 diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..1b64aec --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0979fe69575aec776d34fc3dbbf0a8a3e60abaef20681f59539da403f77752ba +size 5499