初始化项目,由ModelHub XC社区提供模型
Model: QuantFactory/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO-GGUF Source: Original Platform
This commit is contained in:
49
.gitattributes
vendored
Normal file
49
.gitattributes
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f263fc8711e0d69c5180b34be0b3548db90550ae7f380b4bd7bbaeb96ad47da0
|
||||
size 3179132864
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:060a60a2bd191d1dc841da8c8ec3acff074408584158c3323ae26e709ee34ec0
|
||||
size 4321957824
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e4bdc8662e7b326673d7ac0a81d2a0d77bbfa539bd03b1f989fbf79611e9fcf8
|
||||
size 4018919360
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1400bd9f4ebb9136483de68e0a744463883fd25ca9a13e49b05c7d979375defa
|
||||
size 3664500672
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:29bcd9037d3bf55eb20c4bd33d487f20e93c43d1466e332eb57a7dfb9594c000
|
||||
size 4661213120
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:674fa290d00b650c3b4b6cd23621fe4009c59e58ae472fcb9ce272067be6fb64
|
||||
size 5130254272
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7f45fa79bc6c9847ef9fbad08c3bb5a0f2dbb56d2e2200a5d37b260a57274e55
|
||||
size 4920735680
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:021212612fe6ecc796ed0144f05505be950acd3706fa2fb0ad211ed4b66c8408
|
||||
size 4692670400
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a9ae2d3754da808cad627ed7c2c926b99f1139e5505dd09a1f2283a8e6809543
|
||||
size 5599295424
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a9d05e61fb11a442e54fd134cee3a59a05e4af776dd8ebb53978e678aacecef3
|
||||
size 6068336576
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d9d2dd2738cf9eb1b270bb6f0bb605bf2ed087c8c7562fc3f6f2a8c1f00a0c11
|
||||
size 5732988864
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b8148df766908989aec769d6ec80988f190a979542eb231ee98d63a2549cfbeb
|
||||
size 5599295424
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:20e509e950f6864e6dc720403c1724e614418e8d53697c076876d960429a873d
|
||||
size 6596007872
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:06bbf0f2812008618b4604c465724da64e41de14d09001d62cd3198d4414a139
|
||||
size 8540772288
|
||||
247
README.md
Normal file
247
README.md
Normal file
@@ -0,0 +1,247 @@
|
||||
|
||||
---
|
||||
|
||||
base_model: EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code
|
||||
language:
|
||||
- en
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- text-generation-inference
|
||||
- transformers
|
||||
- unsloth
|
||||
- llama
|
||||
- trl
|
||||
|
||||
---
|
||||
|
||||
[](https://hf.co/QuantFactory)
|
||||
|
||||
|
||||
# QuantFactory/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO-GGUF
|
||||
This is quantized version of [EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO](https://huggingface.co/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO) created using llama.cpp
|
||||
|
||||
# Original Model Card
|
||||
|
||||
|
||||
# Agent LLama
|
||||
|
||||
Experimental and revolutionary fine-tune with DPO dataset to allow LLama 3.1 8B to be agentic coder. It fine tuned with code dataset for Coder Agent.
|
||||
It has some build-in agent features:
|
||||
- search
|
||||
- calculator
|
||||
- ReAct. [Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629)
|
||||
- fine tuned ReAct for better responses
|
||||
|
||||
Other noticable features:
|
||||
- Self learning using unsloth. (in progress)
|
||||
- can be used in RAG applications
|
||||
- Memory. [**please use Langchain memory , section Message persistence**](https://python.langchain.com/docs/tutorials/chatbot/)
|
||||
|
||||
It is perfectly use for Langchain or LLamaIndex.
|
||||
|
||||
Context Window: 128K
|
||||
|
||||
### Installation
|
||||
```bash
|
||||
pip install --upgrade "transformers>=4.43.2" torch==2.3.1 accelerate vllm==0.5.3.post1
|
||||
```
|
||||
|
||||
Developers can easily integrate EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K into their projects using popular libraries like Transformers and vLLM. The following sections illustrate the usage with simple hands-on examples:
|
||||
|
||||
Optional: to use build in tool, please add to system prompt: "Environment: ipython. Tools: brave_search, wolfram_alpha. Cutting Knowledge Date: December 2023. Today Date: 4 October 2024\n"
|
||||
|
||||
#### ToT - Tree of Thought
|
||||
- Use system prompt:
|
||||
```python
|
||||
"Imagine three different experts are answering this question.
|
||||
All experts will write down 1 step of their thinking,
|
||||
then share it with the group.
|
||||
Then all experts will go on to the next step, etc.
|
||||
If any expert realises they're wrong at any point then they leave.
|
||||
The question is..."
|
||||
```
|
||||
#### ReAct
|
||||
example from langchain agent - [langchain React agent](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py)
|
||||
- Use system prompt:
|
||||
```python
|
||||
"""
|
||||
Answer the following questions as best you can. You have access to the following tools:
|
||||
|
||||
{tools}
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: the input question you must answer
|
||||
Thought: you should always think about what to do
|
||||
Action: the action to take, should be one of [{tool_names}]
|
||||
Action Input: the input to the action
|
||||
Observation: the result of the action
|
||||
... (this Thought/Action/Action Input/Observation can repeat N times)
|
||||
Thought: I now know the final answer
|
||||
Final Answer: the final answer to the original input question
|
||||
|
||||
Begin!
|
||||
|
||||
Question: {input}
|
||||
Thought:{agent_scratchpad}
|
||||
"""
|
||||
```
|
||||
|
||||
### Conversational Use-case
|
||||
#### Use with [Transformers](https://github.com/huggingface/transformers)
|
||||
##### Using `transformers.pipeline()` API , best use for 4bit for fast response.
|
||||
```python
|
||||
import transformers
|
||||
import torch
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.chat_models.huggingface import ChatHuggingFace
|
||||
|
||||
from transformers import BitsAndBytesConfig
|
||||
|
||||
quantization_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype="float16",
|
||||
bnb_4bit_use_double_quant=True,
|
||||
)
|
||||
|
||||
model_id = "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code"
|
||||
pipeline = transformers.pipeline(
|
||||
"text-generation",
|
||||
model=model_id,
|
||||
model_kwargs={"quantization_config": quantization_config}, #for fast response. For full 16bit inference, remove this code.
|
||||
device_map="auto",
|
||||
)
|
||||
messages = [
|
||||
{"role": "system", "content": """
|
||||
Environment: ipython. Tools: brave_search, wolfram_alpha. Cutting Knowledge Date: December 2023. Today Date: 4 October 2024\n
|
||||
You are a coding assistant with expert with everything\n
|
||||
Ensure any code you provide can be executed \n
|
||||
with all required imports and variables defined. List the imports. Structure your answer with a description of the code solution. \n
|
||||
write only the code. do not print anything else.\n
|
||||
debug code if error occurs. \n
|
||||
Here is the user question: {question}
|
||||
"""},
|
||||
{"role": "user", "content": "Create a bar plot showing the market capitalization of the top 7 publicly listed companies using matplotlib"}
|
||||
]
|
||||
outputs = pipeline(messages, max_new_tokens=128, do_sample=True, temperature=0.01, top_k=100, top_p=0.95)
|
||||
print(outputs[0]["generated_text"][-1])
|
||||
```
|
||||
|
||||
# Example:
|
||||
Please go to Colab for sample of the code using Langchain [Colab](https://colab.research.google.com/drive/129SEHVRxlr24r73yf34BKnIHOlD3as09?authuser=1)
|
||||
|
||||
# Unsloth Fast
|
||||
|
||||
```python
|
||||
%%capture
|
||||
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
|
||||
!pip install unsloth
|
||||
# Get latest Unsloth
|
||||
!pip install --upgrade --no-deps "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
||||
!pip install langchain_experimental
|
||||
|
||||
from unsloth import FastLanguageModel
|
||||
from google.colab import userdata
|
||||
|
||||
|
||||
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
|
||||
fourbit_models = [
|
||||
"unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
|
||||
"unsloth/gemma-7b-it-bnb-4bit",
|
||||
] # More models at https://huggingface.co/unsloth
|
||||
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name = "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code",
|
||||
max_seq_length = 128000,
|
||||
load_in_4bit = True,
|
||||
token =userdata.get('HF_TOKEN')
|
||||
)
|
||||
def chatbot(query):
|
||||
messages = [
|
||||
{"from": "system", "value":
|
||||
"""
|
||||
Environment: ipython. Tools: brave_search, wolfram_alpha. Cutting Knowledge Date: December 2023. Today Date: 4 October 2024\n
|
||||
You are a coding assistant with expert with everything\n
|
||||
Ensure any code you provide can be executed \n
|
||||
with all required imports and variables defined. List the imports. Structure your answer with a description of the code solution. \n
|
||||
write only the code. do not print anything else.\n
|
||||
use ipython for search tool. \n
|
||||
debug code if error occurs. \n
|
||||
Here is the user question: {question}
|
||||
"""
|
||||
},
|
||||
{"from": "human", "value": query},
|
||||
]
|
||||
inputs = tokenizer.apply_chat_template(messages, tokenize = True, add_generation_prompt = True, return_tensors = "pt").to("cuda")
|
||||
|
||||
text_streamer = TextStreamer(tokenizer)
|
||||
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 2048, use_cache = True)
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Execute code (Make sure to use virtual environments)
|
||||
```bash
|
||||
python3 -m venv env
|
||||
source env/bin/activate
|
||||
```
|
||||
|
||||
## Execution code responses from Llama
|
||||
#### Please use execute python code function for local. For langchain, please use Python REPL() to execute code
|
||||
|
||||
execute code funciton locally in python:
|
||||
```python
|
||||
def execute_Python_code(code):
|
||||
# A string stream to capture the outputs of exec
|
||||
output = io.StringIO()
|
||||
try:
|
||||
# Redirect stdout to the StringIO object
|
||||
with contextlib.redirect_stdout(output):
|
||||
# Allow imports
|
||||
exec(code, globals())
|
||||
except Exception as e:
|
||||
# If an error occurs, capture it as part of the output
|
||||
print(f"Error: {e}", file=output)
|
||||
return output.getvalue()
|
||||
```
|
||||
|
||||
Langchain python Repl
|
||||
- Install
|
||||
|
||||
```bash
|
||||
!pip install langchain_experimental
|
||||
```
|
||||
|
||||
Code:
|
||||
```python
|
||||
from langchain_core.tools import Tool
|
||||
from langchain_experimental.utilities import PythonREPL
|
||||
|
||||
python_repl = PythonREPL()
|
||||
|
||||
# You can create the tool to pass to an agent
|
||||
repl_tool = Tool(
|
||||
name="python_repl",
|
||||
description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
|
||||
func=python_repl.run,
|
||||
)
|
||||
repl_tool(outputs[0]["generated_text"][-1])
|
||||
```
|
||||
|
||||
# Safety inputs/ outputs procedures
|
||||
Fo all inputs, please use Llama-Guard: meta-llama/Llama-Guard-3-8B for safety classification.
|
||||
Go to model card [Llama-Guard](https://huggingface.co/meta-llama/Llama-Guard-3-8B)
|
||||
|
||||
|
||||
|
||||
# Uploaded model
|
||||
|
||||
- **Developed by:** EpistemeAI
|
||||
- **License:** apache-2.0
|
||||
- **Finetuned from model :** EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code
|
||||
|
||||
This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
|
||||
|
||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
||||
{"framework": "pytorch", "task": "others", "allow_remote": true}
|
||||
Reference in New Issue
Block a user