Support loading weights when physical experts are different from logical experts (#6386)

This commit is contained in:
fzyzcjy
2025-05-20 12:05:53 +08:00
committed by GitHub
parent d0443275f0
commit c471d39eb9
2 changed files with 37 additions and 1 deletions

View File

@@ -15,7 +15,7 @@ import json
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from typing import List, Optional
import torch
import torch.distributed
@@ -163,6 +163,19 @@ class ExpertLocationMetadata:
logical_to_all_physical_map_num_valid=logical_to_all_physical_map_num_valid,
)
# -------------------------------- usage ------------------------------------
def logical_to_all_physical(
self, layer_id: int, logical_expert_id: int
) -> List[int]:
return [
physical_expert_id
for physical_expert_id in self.logical_to_all_physical_map[
layer_id, logical_expert_id
].tolist()
if physical_expert_id != -1
]
_global_expert_location_metadata: Optional[ExpertLocationMetadata] = None