Support loading weights when physical experts are different from logical experts (#6386)
This commit is contained in:
@@ -15,7 +15,7 @@ import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
import torch.distributed
|
||||
@@ -163,6 +163,19 @@ class ExpertLocationMetadata:
|
||||
logical_to_all_physical_map_num_valid=logical_to_all_physical_map_num_valid,
|
||||
)
|
||||
|
||||
# -------------------------------- usage ------------------------------------
|
||||
|
||||
def logical_to_all_physical(
|
||||
self, layer_id: int, logical_expert_id: int
|
||||
) -> List[int]:
|
||||
return [
|
||||
physical_expert_id
|
||||
for physical_expert_id in self.logical_to_all_physical_map[
|
||||
layer_id, logical_expert_id
|
||||
].tolist()
|
||||
if physical_expert_id != -1
|
||||
]
|
||||
|
||||
|
||||
_global_expert_location_metadata: Optional[ExpertLocationMetadata] = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user