From fb8377d81ea5879f04bbe903be3d186f287ddef9 Mon Sep 17 00:00:00 2001 From: Robin Davidsson Date: Wed, 15 May 2024 17:38:49 +0000 Subject: [PATCH] Create README.md --- README.md | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..9549f1b --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +--- +tags: +- moe +- llama +- '3' +- llama 3 +- 2x8b +--- +# GGUF files of [Llama-3-Magenta-Instruct-4x8B-MoE](https://huggingface.co/RDson/Llama-3-Magenta-Instruct-4x8B-MoE) + + +# Llama-3-Magenta-Instruct-4x8B-MoE +This is a experimental MoE created from [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [nvidia/Llama3-ChatQA-1.5-8B](https://huggingface.co/nvidia/Llama3-ChatQA-1.5-8B), [Salesforce/SFR-Iterative-DPO-LLaMA-3-8B-R](https://huggingface.co/Salesforce/SFR-Iterative-DPO-LLaMA-3-8B-R) and [Muhammad2003/Llama3-8B-OpenHermes-DPO](https://huggingface.co/Muhammad2003/Llama3-8B-OpenHermes-DPO) using Mergekit. + +Mergekit yaml file: +``` +base_model: Meta-Llama-3-8B-Instruct +experts: + - source_model: Meta-Llama-3-8B-Instruct + positive_prompts: + - "explain" + - "chat" + - "assistant" + - "think" + - "roleplay" + - "versatile" + - "helpful" + - "factual" + - "integrated" + - "adaptive" + - "comprehensive" + - "balanced" + negative_prompts: + - "specialized" + - "narrow" + - "focused" + - "limited" + - "specific" + - source_model: ChatQA-1.5-8B + positive_prompts: + - "python" + - "math" + - "solve" + - "code" + - "programming" + negative_prompts: + - "sorry" + - "cannot" + - "factual" + - "concise" + - "straightforward" + - "objective" + - "dry" + - source_model: SFR-Iterative-DPO-LLaMA-3-8B-R + positive_prompts: + - "chat" + - "assistant" + - "AI" + - "instructive" + - "clear" + - "directive" + - "helpful" + - "informative" + - source_model: Llama3-8B-OpenHermes-DPO + positive_prompts: + - "analytical" + - "accurate" + - "logical" + - "knowledgeable" + - "precise" + - "calculate" + - "compute" + - "solve" + - "work" + - "python" + - "code" + - "javascript" + - "programming" + - "algorithm" + - "tell me" + - "assistant" + negative_prompts: + - "creative" + - "abstract" + - "imaginative" + - "artistic" + - "emotional" + - "mistake" + - "inaccurate" +gate_mode: hidden +dtype: float16 +``` +Some inspiration for the Mergekit yaml file is from [LoneStriker/Umbra-MoE-4x10.7-2.4bpw-h6-exl2](https://huggingface.co/LoneStriker/Umbra-MoE-4x10.7-2.4bpw-h6-exl2). \ No newline at end of file