目录
llama-3-8b.layers=32 llama-3-70b.layers=80
llama-3-8b.layers=32 llama-3-70b.layers=80
shard_mappings = {
"llama-3-8b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
"TinygradDynamicShardInferenceEngine": Shard(model_id="llama3-8b-sfr", start_layer=0, end_layer=0, n_layers=32),
},
"llama-3-70b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Met