初始化项目,由ModelHub XC社区提供模型
Model: RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf Source: Original Platform
This commit is contained in:
57
.gitattributes
vendored
Normal file
57
.gitattributes
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2-0.5B-NashMD.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
3
Qwen2-0.5B-NashMD.IQ3_M.gguf
Normal file
3
Qwen2-0.5B-NashMD.IQ3_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7cb50ecf5b5c5e6599f7273982b60d0d99239e3370d6a46c7e6ee679091866b8
|
||||
size 342749920
|
||||
3
Qwen2-0.5B-NashMD.IQ3_S.gguf
Normal file
3
Qwen2-0.5B-NashMD.IQ3_S.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:80e0a807b22718f063783c447204bb00b4e47e2c4dbcf9c842f46f46bc1eef7f
|
||||
size 338605024
|
||||
3
Qwen2-0.5B-NashMD.IQ3_XS.gguf
Normal file
3
Qwen2-0.5B-NashMD.IQ3_XS.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1f4bbe07def81c0763f98f0c8bcb8ce89eb254ebd7c509e2b8c8b3eae9a4bae0
|
||||
size 338605024
|
||||
3
Qwen2-0.5B-NashMD.IQ4_NL.gguf
Normal file
3
Qwen2-0.5B-NashMD.IQ4_NL.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3301f78251aef16b03b35ee0da4097b78c1046090fb65669482e5d3facac9515
|
||||
size 354302944
|
||||
3
Qwen2-0.5B-NashMD.IQ4_XS.gguf
Normal file
3
Qwen2-0.5B-NashMD.IQ4_XS.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2068704d3e3696af59a76d6111f9f54d6bf7979a1133fbf996b0a6e158be2c86
|
||||
size 351442912
|
||||
3
Qwen2-0.5B-NashMD.Q2_K.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q2_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e04069d8775aebccef486287557d5c8418a0cb5f037be8ce3c1ef9212aae93ad
|
||||
size 338605024
|
||||
3
Qwen2-0.5B-NashMD.Q3_K.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q3_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8c99a6047861b376307d4e09d9b9e98b3bbfe43a70a9bffa28496359fe787f97
|
||||
size 355464160
|
||||
3
Qwen2-0.5B-NashMD.Q3_K_L.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q3_K_L.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:521194a101b7cdddbee9609a84dee7c8f516bfcb8484a8e9c6969a8d5b64f173
|
||||
size 369355744
|
||||
3
Qwen2-0.5B-NashMD.Q3_K_M.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q3_K_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8c99a6047861b376307d4e09d9b9e98b3bbfe43a70a9bffa28496359fe787f97
|
||||
size 355464160
|
||||
3
Qwen2-0.5B-NashMD.Q3_K_S.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q3_K_S.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:da91ab6bf0c2cf0fee8699e6fb154f0b56e70137eb16c9018dc592c97a3c48dc
|
||||
size 338260960
|
||||
3
Qwen2-0.5B-NashMD.Q4_0.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q4_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9125e1523d4e25e80adfa2487c29b32494d9396b33c231d0c2f4f64164a1a418
|
||||
size 352152544
|
||||
3
Qwen2-0.5B-NashMD.Q4_1.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q4_1.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:81b9397abf50eabca053bcd65c46feb8f69b09017072960603dc5d0f46421d42
|
||||
size 374516704
|
||||
3
Qwen2-0.5B-NashMD.Q4_K.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q4_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c8cfba81e8fb28795e511682ab15f1f7fdc53011ddb35c3bcf3f33a519a90982
|
||||
size 397805536
|
||||
3
Qwen2-0.5B-NashMD.Q4_K_M.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q4_K_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c8cfba81e8fb28795e511682ab15f1f7fdc53011ddb35c3bcf3f33a519a90982
|
||||
size 397805536
|
||||
3
Qwen2-0.5B-NashMD.Q4_K_S.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q4_K_S.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4a734941d0d0f91c15231dc45fbab12ef92e521187f7787561380cb2e6ce37ae
|
||||
size 385469408
|
||||
3
Qwen2-0.5B-NashMD.Q5_0.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q5_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:eb1ccd8aca01cb87db98f77656b8aa0472d2bbcf93f27fe86b1ea92f06ed8efc
|
||||
size 396880864
|
||||
3
Qwen2-0.5B-NashMD.Q5_1.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q5_1.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d9bcbfa90031d621996f2f69b37c31e71da1aed02df8405d24e2054cd5bc0f60
|
||||
size 419245024
|
||||
3
Qwen2-0.5B-NashMD.Q5_K.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q5_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1556009cb9e36dda8bf09464560c00fcbd91746d4bd3918485a060214ea6c4bc
|
||||
size 420083680
|
||||
3
Qwen2-0.5B-NashMD.Q5_K_M.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q5_K_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1556009cb9e36dda8bf09464560c00fcbd91746d4bd3918485a060214ea6c4bc
|
||||
size 420083680
|
||||
3
Qwen2-0.5B-NashMD.Q5_K_S.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q5_K_S.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:deb4d6afba9388dfdf51fba0bd602c046ffa1f678b3e221810a58ed96fa210fb
|
||||
size 412707808
|
||||
3
Qwen2-0.5B-NashMD.Q6_K.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q6_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2cc3a2da986cb096f3d140110ee287e3e7813b4a1ab296567d7491b9b5ce2b33
|
||||
size 505734112
|
||||
3
Qwen2-0.5B-NashMD.Q8_0.gguf
Normal file
3
Qwen2-0.5B-NashMD.Q8_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ebf51e54b6ccb5c229b7a081066cf3e091f3fe4fd2176eb828f682f7a323fe67
|
||||
size 531065824
|
||||
112
README.md
Normal file
112
README.md
Normal file
@@ -0,0 +1,112 @@
|
||||
Quantization made by Richard Erkhov.
|
||||
|
||||
[Github](https://github.com/RichardErkhov)
|
||||
|
||||
[Discord](https://discord.gg/pvy7H8DZMG)
|
||||
|
||||
[Request more models](https://github.com/RichardErkhov/quant_request)
|
||||
|
||||
|
||||
Qwen2-0.5B-NashMD - GGUF
|
||||
- Model creator: https://huggingface.co/qgallouedec/
|
||||
- Original model: https://huggingface.co/qgallouedec/Qwen2-0.5B-NashMD/
|
||||
|
||||
|
||||
| Name | Quant method | Size |
|
||||
| ---- | ---- | ---- |
|
||||
| [Qwen2-0.5B-NashMD.Q2_K.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q2_K.gguf) | Q2_K | 0.32GB |
|
||||
| [Qwen2-0.5B-NashMD.IQ3_XS.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.IQ3_XS.gguf) | IQ3_XS | 0.32GB |
|
||||
| [Qwen2-0.5B-NashMD.IQ3_S.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.IQ3_S.gguf) | IQ3_S | 0.32GB |
|
||||
| [Qwen2-0.5B-NashMD.Q3_K_S.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q3_K_S.gguf) | Q3_K_S | 0.32GB |
|
||||
| [Qwen2-0.5B-NashMD.IQ3_M.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.IQ3_M.gguf) | IQ3_M | 0.32GB |
|
||||
| [Qwen2-0.5B-NashMD.Q3_K.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q3_K.gguf) | Q3_K | 0.33GB |
|
||||
| [Qwen2-0.5B-NashMD.Q3_K_M.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q3_K_M.gguf) | Q3_K_M | 0.33GB |
|
||||
| [Qwen2-0.5B-NashMD.Q3_K_L.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q3_K_L.gguf) | Q3_K_L | 0.34GB |
|
||||
| [Qwen2-0.5B-NashMD.IQ4_XS.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.IQ4_XS.gguf) | IQ4_XS | 0.33GB |
|
||||
| [Qwen2-0.5B-NashMD.Q4_0.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q4_0.gguf) | Q4_0 | 0.33GB |
|
||||
| [Qwen2-0.5B-NashMD.IQ4_NL.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.IQ4_NL.gguf) | IQ4_NL | 0.33GB |
|
||||
| [Qwen2-0.5B-NashMD.Q4_K_S.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q4_K_S.gguf) | Q4_K_S | 0.36GB |
|
||||
| [Qwen2-0.5B-NashMD.Q4_K.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q4_K.gguf) | Q4_K | 0.37GB |
|
||||
| [Qwen2-0.5B-NashMD.Q4_K_M.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q4_K_M.gguf) | Q4_K_M | 0.37GB |
|
||||
| [Qwen2-0.5B-NashMD.Q4_1.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q4_1.gguf) | Q4_1 | 0.35GB |
|
||||
| [Qwen2-0.5B-NashMD.Q5_0.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q5_0.gguf) | Q5_0 | 0.37GB |
|
||||
| [Qwen2-0.5B-NashMD.Q5_K_S.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q5_K_S.gguf) | Q5_K_S | 0.38GB |
|
||||
| [Qwen2-0.5B-NashMD.Q5_K.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q5_K.gguf) | Q5_K | 0.39GB |
|
||||
| [Qwen2-0.5B-NashMD.Q5_K_M.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q5_K_M.gguf) | Q5_K_M | 0.39GB |
|
||||
| [Qwen2-0.5B-NashMD.Q5_1.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q5_1.gguf) | Q5_1 | 0.39GB |
|
||||
| [Qwen2-0.5B-NashMD.Q6_K.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q6_K.gguf) | Q6_K | 0.47GB |
|
||||
| [Qwen2-0.5B-NashMD.Q8_0.gguf](https://huggingface.co/RichardErkhov/qgallouedec_-_Qwen2-0.5B-NashMD-gguf/blob/main/Qwen2-0.5B-NashMD.Q8_0.gguf) | Q8_0 | 0.49GB |
|
||||
|
||||
|
||||
|
||||
|
||||
Original model description:
|
||||
---
|
||||
base_model: Qwen/Qwen2-0.5B-Instruct
|
||||
library_name: transformers
|
||||
model_name: Qwen2-0.5B-NashMD
|
||||
tags:
|
||||
- generated_from_trainer
|
||||
- trl
|
||||
- nash-md
|
||||
licence: license
|
||||
---
|
||||
|
||||
# Model Card for Qwen2-0.5B-NashMD
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct).
|
||||
It has been trained using [TRL](https://github.com/huggingface/trl).
|
||||
|
||||
## Quick start
|
||||
|
||||
```python
|
||||
from transformers import pipeline
|
||||
|
||||
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
||||
generator = pipeline("text-generation", model="qgallouedec/Qwen2-0.5B-NashMD", device="cuda")
|
||||
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
||||
print(output["generated_text"])
|
||||
```
|
||||
|
||||
## Training procedure
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/trl/runs/5r7w3wt4)
|
||||
|
||||
This model was trained with Nash-MD, a method introduced in [Nash Learning from Human Feedback](https://huggingface.co/papers/2312.00886).
|
||||
|
||||
### Framework versions
|
||||
|
||||
- TRL: 0.12.0.dev0
|
||||
- Transformers: 4.46.0.dev0
|
||||
- Pytorch: 2.4.1
|
||||
- Datasets: 3.0.2
|
||||
- Tokenizers: 0.20.0
|
||||
|
||||
## Citations
|
||||
|
||||
Cite Nash-MD as:
|
||||
|
||||
```bibtex
|
||||
@inproceedings{munos2024nash,
|
||||
title = {Nash Learning from Human Feedback},
|
||||
author = {R{'{e}}mi Munos and Michal Valko and Daniele Calandriello and Mohammad Gheshlaghi Azar and Mark Rowland and Zhaohan Daniel Guo and Yunhao Tang and Matthieu Geist and Thomas Mesnard and C{\^{o}}me Fiegel and Andrea Michi and Marco Selvi and Sertan Girgin and Nikola Momchev and Olivier Bachem and Daniel J. Mankowitz and Doina Precup and Bilal Piot},
|
||||
year = 2024,
|
||||
booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024},
|
||||
publisher = {OpenReview.net},
|
||||
url = {https://openreview.net/forum?id=Y5AmNYiyCQ}
|
||||
}
|
||||
```
|
||||
|
||||
Cite TRL as:
|
||||
|
||||
```bibtex
|
||||
@misc{vonwerra2022trl,
|
||||
title = {{TRL: Transformer Reinforcement Learning}},
|
||||
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
|
||||
year = 2020,
|
||||
journal = {GitHub repository},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/huggingface/trl}}
|
||||
}
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user