[{"title":"( 13 个子文件 48KB ) 本项目提供了基于910B的huggingface LLM模型的Tensor Parallel(TP)部署教程,同时也可以","children":[{"title":"LLM-TP-Inference-on-910B-main","children":[{"title":"utils.py <span style='color:#111;'> 2.25KB </span>","children":null,"spread":false},{"title":".github","children":[{"title":"FUNDING.yml <span style='color:#111;'> 787B </span>","children":null,"spread":false}],"spread":true},{"title":"main.py <span style='color:#111;'> 4.81KB </span>","children":null,"spread":false},{"title":"models","children":[{"title":"__init__.py <span style='color:#111;'> 279B </span>","children":null,"spread":false},{"title":"llama","children":[{"title":"__init__.py <span style='color:#111;'> 1B </span>","children":null,"spread":false},{"title":"llama_mp.py <span style='color:#111;'> 62.29KB </span>","children":null,"spread":false},{"title":"llama_tokenizer.py <span style='color:#111;'> 21.55KB </span>","children":null,"spread":false}],"spread":true},{"title":"base_mp.py <span style='color:#111;'> 12.74KB </span>","children":null,"spread":false},{"title":"qwen","children":[{"title":"__init__.py <span style='color:#111;'> 1B </span>","children":null,"spread":false},{"title":"qwen_generation_utils.py <span style='color:#111;'> 14.26KB </span>","children":null,"spread":false},{"title":"configuration_qwen.py <span style='color:#111;'> 2.29KB </span>","children":null,"spread":false},{"title":"qwen_mp.py <span style='color:#111;'> 55.74KB </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"README.md <span style='color:#111;'> 5.49KB </span>","children":null,"spread":false}],"spread":true}],"spread":true}]