退回到 b7516 版本

This commit is contained in:
2026-01-16 18:12:13 +08:00
parent 9d7890f8c6
commit 7e0d40b535
380 changed files with 18454 additions and 38808 deletions

View File

@@ -21,7 +21,7 @@ int main(int argc, char ** argv) {
params.prompt = "Hello my name is";
params.n_predict = 32;
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_BATCHED, print_usage)) {
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON, print_usage)) {
return 1;
}
@@ -68,7 +68,7 @@ int main(int argc, char ** argv) {
auto sparams = llama_sampler_chain_default_params();
sparams.no_perf = false;
std::vector<llama_sampler_seq_config> sampler_configs;
std::vector<llama_sampler *> samplers;
for (int32_t i = 0; i < n_parallel; ++i) {
llama_sampler * smpl = llama_sampler_chain_init(sparams);
@@ -78,12 +78,7 @@ int main(int argc, char ** argv) {
llama_sampler_chain_add(smpl, llama_sampler_init_temp (params.sampling.temp));
llama_sampler_chain_add(smpl, llama_sampler_init_dist (params.sampling.seed));
sampler_configs.push_back({ i, smpl });
}
if (params.sampling.backend_sampling) {
ctx_params.samplers = sampler_configs.data();
ctx_params.n_samplers = sampler_configs.size();
samplers.push_back(smpl);
}
llama_context * ctx = llama_init_from_model(model, ctx_params);
@@ -185,7 +180,7 @@ int main(int argc, char ** argv) {
continue;
}
const llama_token new_token_id = llama_sampler_sample(sampler_configs[i].sampler, ctx, i_batch[i]);
const llama_token new_token_id = llama_sampler_sample(samplers[i], ctx, i_batch[i]);
// is it an end of generation? -> mark the stream as finished
if (llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_predict) {
@@ -241,15 +236,15 @@ int main(int argc, char ** argv) {
__func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f));
LOG("\n");
llama_perf_sampler_print(sampler_configs[0].sampler);
llama_perf_sampler_print(samplers[0]);
llama_perf_context_print(ctx);
fprintf(stderr, "\n");
llama_batch_free(batch);
for (auto & sampler_config : sampler_configs) {
llama_sampler_free(sampler_config.sampler);
for (auto & sampler_config : samplers) {
llama_sampler_free(sampler_config);
}
llama_free(ctx);