commit 3ff3e9e48f102ead6a36e080d1900def4f204549 Author: ModelHub XC Date: Thu May 14 23:25:11 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: AI-ModelScope/Vicuna-7B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..97c8e80 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,32 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..bdca894 --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +--- +license: Apache License 2.0 +tasks: +- text-generation +language: +- en +library_name: transformers +inference: false +widgets: + - task: text-generation + version: 1 + inputs: + - type: text + name: text + title: 输入文字 + validator: + max_words: 128 + examples: + - name: 1 + title: 示例1 + inputs: + - name: text + data: 你好 + inferencespec: + cpu: 4 + memory: 24000 + gpu: 1 + gpu_memory: 16000 +--- +**NOTE: This model has delta files applied and can be used directly.** + + +# Vicuna Model Card + +## Model details + +``` +pip install fschat +``` + +```python +from modelscope.utils.constant import Tasks +from modelscope.pipelines import pipeline +pipe = pipeline(task=Tasks.text_generation, model='AI-ModelScope/Vicuna-7B', model_revision='v1.0.1', device='cuda') +inputs = '你好' +result = pipe(inputs) +print(result) + +``` + +**Model type:** +Vicuna is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. +It is an auto-regressive language model, based on the transformer architecture. + +**Model date:** +Vicuna was trained between March 2023 and April 2023. + +**Organizations developing the model:** +The Vicuna team with members from UC Berkeley, CMU, Stanford, and UC San Diego. + +**Paper or resources for more information:** +https://vicuna.lmsys.org/ + +**License:** +Apache License 2.0 + +**Where to send questions or comments about the model:** +https://github.com/lm-sys/FastChat/issues + +## Intended use +**Primary intended uses:** +The primary use of Vicuna is research on large language models and chatbots. + +**Primary intended users:** +The primary intended users of the model are researchers and hobbyists in natural language processing, machine learning, and artificial intelligence. + +## Training dataset +70K conversations collected from ShareGPT.com. + +## Evaluation dataset +A preliminary evaluation of the model quality is conducted by creating a set of 80 diverse questions and utilizing GPT-4 to judge the model outputs. See https://vicuna.lmsys.org/ for more details. + +## Major updates of weights v1.1 +- Refactor the tokenization and separator. In Vicuna v1.1, the separator has been changed from `"###"` to the EOS token `""`. This change makes it easier to determine the generation stop criteria and enables better compatibility with other libraries. +- Fix the supervised fine-tuning loss computation for better model quality. \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..06fde26 --- /dev/null +++ b/config.json @@ -0,0 +1,23 @@ +{ + "_name_or_path": "/Users/tdo/Desktop/7b-hf", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.29.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..3be9d92 --- /dev/null +++ b/configuration.json @@ -0,0 +1,11 @@ +{ + "framework": "pytorch", + "task": "text-generation", + "model": { + "type": "Vicuna7b" + }, + "pipeline": { + "type": "Vicuna7b-text-generation-pipe" + }, + "allow_remote": true +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2c05748 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.29.0.dev0" +} diff --git a/ms_wrapper.py b/ms_wrapper.py new file mode 100644 index 0000000..924a42d --- /dev/null +++ b/ms_wrapper.py @@ -0,0 +1,77 @@ +import os +from typing import Any, Dict, Union + +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer + +from modelscope.models.base import Model, TorchModel +from modelscope.models.builder import MODELS +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.pipelines.nlp.text_generation_pipeline import \ + TextGenerationPipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +Vicuna_PROMPT_FORMAT = "### Human:\n{prompt} \n ### Assistant:\n" + +@PIPELINES.register_module(Tasks.text_generation, + module_name='Vicuna7b-text-generation-pipe') +class Vicuna7bTextGenerationPipeline(TextGenerationPipeline): + def __init__(self, model: Union[Model, str], *args, **kwargs): + model = Vicuna7bTextGeneration(model) if isinstance(model, + str) else model + super().__init__(model=model, **kwargs) + + def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]: + return inputs + + def _sanitize_parameters(self, **pipeline_parameters): + return {}, pipeline_parameters, {} + + # define the forward pass + def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]: + return self.model(inputs, **forward_params) + + # format the outputs from pipeline + def postprocess(self, input, **kwargs) -> Dict[str, Any]: + return input + + +@MODELS.register_module(Tasks.text_generation, module_name='Vicuna7b') +class Vicuna7bTextGeneration(TorchModel): + def __init__(self, model_dir=None, *args, **kwargs): + super().__init__(model_dir, *args, **kwargs) + self.logger = get_logger() + # loading tokenizer + self.tokenizer = LlamaTokenizer.from_pretrained(model_dir, + use_fast=False) + self.model = AutoModelForCausalLM.from_pretrained( + model_dir, + low_cpu_mem_usage=True, + device_map="auto", + torch_dtype=torch.float16) + self.model = self.model.eval() + + def forward(self, input: Dict, *args, **kwargs) -> Dict[str, Any]: + output = {} + res = self.infer(input, **kwargs) + output['text'] = res + return output + + def quantize(self, bits: int): + self.model = self.model.quantize(bits) + return self + + def infer(self, input, max_new_tokens=1024, **kwargs): + kwargs['max_new_tokens'] = max_new_tokens + device = self.model.device + input = Vicuna_PROMPT_FORMAT.format(prompt=input) + input_ids = self.tokenizer(input, + return_tensors="pt").input_ids.to(device) + output_ids = self.model.generate(input_ids, **kwargs) + output_ids = output_ids[0][len(input_ids[0]):] + + outputs = self.tokenizer.decode(output_ids, + skip_special_tokens=True).strip() + return outputs diff --git a/pytorch_model-00001-of-00002.bin b/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000..4d9e43b --- /dev/null +++ b/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed572be140240b212049a3e791271eed8a04c40bc732c91a4da4b1469db23b1 +size 9976634558 diff --git a/pytorch_model-00002-of-00002.bin b/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000..7c7e42c --- /dev/null +++ b/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9382da358f0ec38c4fca3bcf1e3e65274ae4c78090e0775b4bb4dea6a518e08 +size 3500315539 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..94427b9 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6789c806fad05c6b65baa1d30c706ffc43c725283d6e5e1c82dedd9326cc31 +size 27118 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..d85ba6c --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..400e3de --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,33 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +}