初始化项目,由ModelHub XC社区提供模型

Model: ClueAI/ChatYuan-7B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-12 13:16:58 +08:00
commit 4f9cb0a0a7
12 changed files with 321 additions and 0 deletions

68
ms_wrapper.py Normal file
View File

@@ -0,0 +1,68 @@
import os
from typing import Any, Dict, Union
import torch
from modelscope.models.base import Model, TorchModel
from modelscope.models.builder import MODELS
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
if 'CUDA_VISIBLE_DEVICES' not in os.environ:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
@PIPELINES.register_module(Tasks.text_generation, module_name='ChatYuan-7B-text-generation-pipe')
class ChatYuan7BTextGenerationPipeline(Pipeline):
def __init__(
self,
model: Union[Model, str],
*args,
**kwargs):
model = ChatYuan7BTextGeneration(model) if isinstance(model, str) else model
super().__init__(model=model, **kwargs)
def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
return inputs
# define the forward pass
def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
return self.model(inputs)
# format the outputs from pipeline
def postprocess(self, input, **kwargs) -> Dict[str, Any]:
return input
@MODELS.register_module(Tasks.text_generation, module_name='ChatYuan-7B')
class ChatYuan7BTextGeneration(TorchModel):
def __init__(self, model_dir=None, *args, **kwargs):
super().__init__(model_dir, *args, **kwargs)
self.logger = get_logger()
# loading tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
self.model = LlamaForCausalLM.from_pretrained(model_dir, torch_dtype=torch.float16, device_map="auto")
self.model = self.model.eval()
def forward(self,input: Dict) -> Dict[str, Any]:
output = {}
res = self.infer(input)
output['text'] = res
return output
def quantize(self, bits: int):
self.model = self.model.quantize(bits)
return self
def infer(self, input):
device = self.model.device
input_ids = self.tokenizer(input, return_tensors="pt").input_ids.to(device)
generate_ids = self.model.generate(input_ids, max_new_tokens=1024, do_sample = True, temperature = 0.7)
output = self.tokenizer.batch_decode(generate_ids)[0]
response = output[len(input):]
return response