// build the input after conv2d (inp_raw --> patches)
// returns tensor with shape [n_embd, n_patches]
ggml_tensor*build_inp();
ggml_tensor*build_inp_raw(intchannels=3);
ggml_tensor*build_norm(
ggml_tensor*cur,
ggml_tensor*mw,
ggml_tensor*mb,
norm_typetype,
floatnorm_eps,
intil)const;
ggml_tensor*build_ffn(
ggml_tensor*cur,
ggml_tensor*up,
ggml_tensor*up_b,
ggml_tensor*gate,
ggml_tensor*gate_b,
ggml_tensor*down,
ggml_tensor*down_b,
ffn_op_typetype_op,
intil)const;
ggml_tensor*build_attn(
ggml_tensor*wo,
ggml_tensor*wo_b,
ggml_tensor*q_cur,
ggml_tensor*k_cur,
ggml_tensor*v_cur,
ggml_tensor*kq_mask,
floatkq_scale,
intil)const;
// implementation of the 2D RoPE without adding a new op in ggml
// this is not efficient (use double the memory), but works on all backends
// TODO: there was a more efficient which relies on ggml_view and ggml_rope_ext_inplace, but the rope inplace does not work well with non-contiguous tensors ; we should fix that and revert back to the original implementation in https://github.com/ggml-org/llama.cpp/pull/13065