from mlx_lm import load, generate import gradio as gr model, tokenizer = load("./") def predict(text): response = generate(model, tokenizer, max_tokens=1024, prompt=text, verbose=True) return response demo = gr.Interface( fn=predict, # inputs='text', # outputs='text', inputs=[gr.Textbox(lines=10, placeholder="Write a hello world in python")], outputs=[gr.Textbox(lines=10)], ) demo.launch(server_name="0.0.0.0", server_port=8502, share=False)