From c2b455f5307068b0f88a490ee1f49944fe6086a0 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 17 Jun 2026 11:40:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: iandennismiller/LLama-2-MedText-13b-GGUF Source: Original Platform --- .gitattributes | 36 +++++++++++ LLama-2-MedText-13b-Q3_K_L.gguf | 3 + LLama-2-MedText-13b-Q4_K_S.gguf | 3 + LLama-2-MedText-13b-Q6_K.gguf | 3 + LLama-2-MedText-13b-f16.gguf | 3 + LLama-2-MedText-13b-q8_0.gguf | 3 + README.md | 111 ++++++++++++++++++++++++++++++++ config.json | 26 ++++++++ 8 files changed, 188 insertions(+) create mode 100644 .gitattributes create mode 100644 LLama-2-MedText-13b-Q3_K_L.gguf create mode 100644 LLama-2-MedText-13b-Q4_K_S.gguf create mode 100644 LLama-2-MedText-13b-Q6_K.gguf create mode 100644 LLama-2-MedText-13b-f16.gguf create mode 100644 LLama-2-MedText-13b-q8_0.gguf create mode 100644 README.md create mode 100644 config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f15b49c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +*.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/LLama-2-MedText-13b-Q3_K_L.gguf b/LLama-2-MedText-13b-Q3_K_L.gguf new file mode 100644 index 0000000..a91f974 --- /dev/null +++ b/LLama-2-MedText-13b-Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93371f4e513bdffdc3a7cc164068f9b18ce3287326af67675def93abeab8e2fb +size 6929559424 diff --git a/LLama-2-MedText-13b-Q4_K_S.gguf b/LLama-2-MedText-13b-Q4_K_S.gguf new file mode 100644 index 0000000..57ae23a --- /dev/null +++ b/LLama-2-MedText-13b-Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b10593fe55fac46ea7749a5990eec53b0b3829a618f0b661aefcb4ec681a017 +size 7414331264 diff --git a/LLama-2-MedText-13b-Q6_K.gguf b/LLama-2-MedText-13b-Q6_K.gguf new file mode 100644 index 0000000..dbb2e7a --- /dev/null +++ b/LLama-2-MedText-13b-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34602cc7158d458c82cec54c838e9bdf7691b95ce9256657b6a493b07886f91b +size 10679140224 diff --git a/LLama-2-MedText-13b-f16.gguf b/LLama-2-MedText-13b-f16.gguf new file mode 100644 index 0000000..2817b0e --- /dev/null +++ b/LLama-2-MedText-13b-f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0923d434aa32a017fc7abcc0b94940975375233113f7b345bfec4c9119783061 +size 26033303392 diff --git a/LLama-2-MedText-13b-q8_0.gguf b/LLama-2-MedText-13b-q8_0.gguf new file mode 100644 index 0000000..ea942bb --- /dev/null +++ b/LLama-2-MedText-13b-q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f12f95b748feba8d616514c759e969098339b41da11ad9c173a655a3b48ec19 +size 13831319392 diff --git a/README.md b/README.md new file mode 100644 index 0000000..effff06 --- /dev/null +++ b/README.md @@ -0,0 +1,111 @@ +--- +base_model: https://huggingface.co/truehealth/LLama-2-MedText-13b +inference: false +license: cc-by-nc-4.0 +model_creator: truehealth +model_name: LLama 2 MedText 13b +model_type: llama +prompt_template: '[INST]{prompt}[\INST]' +quantized_by: iandennismiller +pipeline_tag: text-generation +tags: + - medical +--- +# LLama-2-MedText-13b-GGUF + +Quantized GGUF of https://huggingface.co/truehealth/LLama-2-MedText-13b + +## Usage + +Interactive [llama.cpp](https://github.com/ggerganov/llama.cpp/) session: + +```bash +llama-cpp \ + --instruct \ + --color \ + --in-prefix "[INST] " \ + --in-suffix "[\INST] " \ + --model LLama-2-MedText-13b-q8_0.gguf + +== Running in interactive mode. == + - Press Ctrl+C to interject at any time. + - Press Return to return control to LLaMa. + - To return control without starting a new line, end your input with '/'. + - If you want to submit another line, end your input with '\'. + + +> [INST] How confident are you in your knowledge and abilities? +[\INST] [RSP] As an AI language model, I can provide information to the best of my ability based on the resources I was trained on, which were primarily before . While I strive to provide useful and accurate responses, my knowledge is not infinite, and I might not be able to provide professional medical advice or predictions in all cases. Additionally, healthcare decisions should always be evaluated in the context of an individual's unique circumstances and should be evaluated by a healthcare professional. +``` + +## Model card from truehealth/Llama-2-MedText-Delta-Preview + +Trained on https://huggingface.co/datasets/BI55/MedText. + +These are PEFT delta weights and need to be merged into LLama-2-13b to be used for inference. + +library_name: peft + +Training procedure + +The following bitsandbytes quantization config was used during training: + +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +Framework versions + +- PEFT 0.5.0.dev0 + +## Setup Notes + +### Download torch model + +This example demonstrates using `hfdownloader` to download a torch model from HF to `./storage` + +```bash +./hfdownloader -m truehealth/LLama-2-MedText-13b +``` + +If necessary, install `hfdownloader` from https://github.com/bodaay/HuggingFaceModelDownloader + +```bash +bash <(curl -sSL https://raw.githubusercontent.com/bodaay/HuggingFaceModelDownloader/master/scripts/gist_gethfd.sh) -h +``` + +### Quantize torch model with llama.cpp + +Quantize directly to q8_0 + +```bash +llama.cpp/convert.py --outtype q8_0 --outfile LLama-2-MedText-13b-q8_0.gguf ./models/Storage/truehealth_LLama-2-MedText-13b/pytorch_model-00001-of-00003.bin +``` + +First convert to f32 GGUF + +```bash +llama.cpp/convert.py --outtype f32 --outfile LLama-2-MedText-13b-f32.gguf ./models/Storage/truehealth_LLama-2-MedText-13b/pytorch_model-00001-of-00003.bin +``` + +Then quantize f32 GGUF to lower bit resolutions + +```bash +llama.cpp/build/bin/quantize LLama-2-MedText-13b-f32.gguf LLama-2-MedText-13b-Q3_K_L.gguf Q3_K_L +llama.cpp/build/bin/quantize LLama-2-MedText-13b-f32.gguf LLama-2-MedText-13b-Q6_K.gguf Q6_K +``` + +### Distributing model through huggingface + +```bash +mkvirtualenv -p `which python3.11` -a . ${PWD##*/} +python -m pip install huggingface_hub +huggingface-cli login +huggingface-cli lfs-enable-largefiles . +``` diff --git a/config.json b/config.json new file mode 100644 index 0000000..ade60a3 --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "iandennismiller/LLama-2-MedText-13b", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 32000 + } \ No newline at end of file