from transformers import AutoModelForCausalLM, AutoTokenizer import torch class EndpointHandler: def __init__(self, path=""): self.tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained( path, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) self.model.eval() def __call__(self, data): inputs = data.pop("inputs", "") parameters = data.pop("parameters", {}) # Build chat format messages = [ {"role": "system", "content": "You are a cybersecurity expert assistant."}, {"role": "user", "content": inputs} ] text = self.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device) with torch.no_grad(): outputs = self.model.generate( **model_inputs, max_new_tokens=parameters.get("max_new_tokens", 512), temperature=parameters.get("temperature", 0.7), top_p=parameters.get("top_p", 0.9), do_sample=True, pad_token_id=self.tokenizer.eos_token_id ) response = self.tokenizer.decode( outputs[0][model_inputs['input_ids'].shape[1]:], skip_special_tokens=True ) return {"generated_text": response}