.coveragerc .gitignore .gitmodules .pre-commit-config.yaml LICENSE README.md __init__.py flagopen.png pyproject.toml run.py setup.py version.py ./__init__.py ./run.py ./setup.py ./version.py .github/CODEOWNERS .github/workflows/all-tests-metax.yml .github/workflows/all-tests-nvidia.yml .github/workflows/format.yml .github/workflows/functional-tests-metax.yml .github/workflows/functional-tests-nvidia.yml .github/workflows/unit-tests-nvidia.yml build_backend/__init__.py build_backend/flagscale_build_backend.py docker/Dockerfile.cuda docker/Dockerfile.metax docker/Dockerfile.ssh docker/README.md examples/aquila/__init__.py examples/aquila/conf/inference.yaml examples/aquila/conf/train.yaml examples/aquila/conf/train_auto_tuner.yaml examples/aquila/conf/train_cloud.yaml examples/aquila/conf/train_hetero.yaml examples/aquila/conf/inference/7b.yaml examples/aquila/conf/train/34b.yaml examples/aquila/conf/train/3b.yaml examples/aquila/conf/train/70b.yaml examples/aquila/conf/train/7b.yaml examples/aquila/conf/train/hetero.yaml examples/aquila/tokenizer/merges.txt examples/aquila/tokenizer/special_tokens.txt examples/aquila/tokenizer/vocab.json examples/aquila/tokenizer_hf/special_tokens_map.json examples/aquila/tokenizer_hf/tokenizer.json examples/aquila/tokenizer_hf/tokenizer_config.json examples/aquila/tokenizer_hf/vocab.json examples/aquila/utils/__init__.py examples/aquila/utils/convo_dataset.py examples/aquila/utils/convo_prompt.py examples/aquila/utils/cyg_conversation.py examples/cloud/conf/serve.yaml examples/cloud/conf/serve/cloud_model.yaml examples/deepseek_r1/conf/hostfile.txt examples/deepseek_r1/conf/serve.yaml examples/deepseek_r1/conf/serve/671b.yaml examples/deepseek_r1_distill_qwen/conf/serve.yaml examples/deepseek_r1_distill_qwen/conf/serve/32b.yaml examples/deepseek_v3/conf/serve.yaml examples/deepseek_v3/conf/serve_auto_tuner.yaml examples/deepseek_v3/conf/train.yaml examples/deepseek_v3/conf/train_11b_a2b.yaml examples/deepseek_v3/conf/train_auto_tuner.yaml examples/deepseek_v3/conf/train_hetero.yaml examples/deepseek_v3/conf/serve/671b.yaml examples/deepseek_v3/conf/train/11b_a2b.yaml examples/deepseek_v3/conf/train/16b_a3b.yaml examples/deepseek_v3/conf/train/hetero.yaml examples/emu3/conf/README.md examples/emu3/conf/chat.yaml examples/emu3/conf/compress.yaml examples/emu3/conf/gen.yaml examples/emu3/conf/compress/fp8_dynamic.yaml examples/emu3/conf/compress/model.yaml examples/emu3/conf/compress/w4a16.yaml examples/emu3/conf/inference/i2t.yaml examples/emu3/conf/inference/t2i.yaml examples/ernie45/conf/hostfile.txt examples/ernie45/conf/serve.yaml examples/ernie45/conf/serve/300b.yaml examples/grok2/conf/serve.yaml examples/grok2/conf/serve/270b.yaml examples/kimi_k2/conf/hostfile.txt examples/kimi_k2/conf/serve.yaml examples/kimi_k2/conf/serve/1t.yaml examples/llama2/conf/train.yaml examples/llama2/conf/train/7b.yaml examples/llama3/conf/train.yaml examples/llama3/conf/train_hetero.yaml examples/llama3/conf/train/70b.yaml examples/llama3/conf/train/70b_finetune.yaml examples/llama3/conf/train/8b.yaml examples/llama3/conf/train/8b_hetero.yaml examples/llava1_5/conf/train.yaml examples/llava1_5/conf/train/7b.yaml examples/llava_onevision/conf/compress.yaml examples/llava_onevision/conf/hostfile.txt examples/llava_onevision/conf/serve.yaml examples/llava_onevision/conf/train.yaml examples/llava_onevision/conf/compress/fp8_dynamic.yaml examples/llava_onevision/conf/compress/model.yaml examples/llava_onevision/conf/compress/w4a16.yaml examples/llava_onevision/conf/serve/7b.yaml examples/llava_onevision/conf/train/1_5b.yaml examples/llava_onevision/conf/train/7b.yaml examples/minicpm_o_2.6/conf/serve.yaml examples/minicpm_o_2.6/conf/serve/7b.yaml examples/minicpm_v_4/conf/serve.yaml examples/minicpm_v_4/conf/serve/4b.yaml examples/mixtral/conf/train.yaml examples/mixtral/conf/train/8x7b.yaml examples/openjourney/conf/inference.yaml examples/openjourney/conf/inference/t2i.yaml examples/qwen2/conf/rl.yaml examples/qwen2/conf/rl/7b.yaml examples/qwen2_5/__init__.py examples/qwen2_5/models.py examples/qwen2_5/conf/hostfile.txt examples/qwen2_5/conf/hostfile_nodes_envs.txt examples/qwen2_5/conf/rl.yaml examples/qwen2_5/conf/serve.yaml examples/qwen2_5/conf/serve_auto_tuner.yaml examples/qwen2_5/conf/serve_disagg_xpyd.yaml examples/qwen2_5/conf/serve_multiple_instance.yaml examples/qwen2_5/conf/serve_multiple_models.yaml examples/qwen2_5/conf/serve_nodes_envs.yaml examples/qwen2_5/conf/serve_ssh.yaml examples/qwen2_5/conf/train.yaml examples/qwen2_5/conf/rl/0_5b.yaml examples/qwen2_5/conf/serve/72b.yaml examples/qwen2_5/conf/serve/72b_nodes_envs.yaml examples/qwen2_5/conf/serve/7b.yaml examples/qwen2_5/conf/serve/7b_multiple_instance.yaml examples/qwen2_5/conf/serve/multiple_models.yaml examples/qwen2_5/conf/train/1_5b.yaml examples/qwen2_5/utils/__init__.py examples/qwen2_5/utils/convo_dataset.py examples/qwen2_5/utils/convo_prompt.py examples/qwen2_5/utils/cyg_conversation.py examples/qwen2_5_vl/conf/serve.yaml examples/qwen2_5_vl/conf/train.yaml examples/qwen2_5_vl/conf/serve/32b_instruct.yaml examples/qwen2_5_vl/conf/train/32b.yaml examples/qwen2_5_vl/conf/train/7b.yaml examples/qwen3/conf/serve.yaml examples/qwen3/conf/serve_atmb.yaml examples/qwen3/conf/train.yaml examples/qwen3/conf/train_auto_tuner.yaml examples/qwen3/conf/serve/0_6b.yaml examples/qwen3/conf/serve/8b.yaml examples/qwen3/conf/train/0_6b.yaml examples/qwen3/conf/train/14b.yaml examples/qwen3/conf/train/235b_a22b.yaml examples/qwen3/conf/train/30b_a3b.yaml examples/qwen3/conf/train/32b.yaml examples/qwq/conf/serve.yaml examples/qwq/conf/serve/32b.yaml examples/robobrain/compress.yaml examples/robobrain/conf/model.yaml examples/robobrain/conf/serve.yaml examples/robobrain/conf/w8a16.yaml examples/robobrain/conf/serve/7b.yaml examples/robobrain2/conf/serve.yaml examples/robobrain2/conf/serve/32b.yaml examples/robobrain2/conf/serve/7b.yaml flag_scale.egg-info/PKG-INFO flag_scale.egg-info/SOURCES.txt flag_scale.egg-info/dependency_links.txt flag_scale.egg-info/entry_points.txt flag_scale.egg-info/requires.txt flag_scale.egg-info/top_level.txt flagscale/__init__.py flagscale/cli.py flagscale/logger.py flagscale/patches_utils.py flagscale/utils.py flagscale/agent/__init__.py flagscale/agent/collaboration/__init__.py flagscale/agent/collaboration/collaborator.py flagscale/backends/Megatron-LM/examples/multimodal/mlp_converter.py.patch flagscale/backends/Megatron-LM/megatron/__init__.py.patch flagscale/backends/Megatron-LM/megatron/core/model_parallel_config.py.patch flagscale/backends/Megatron-LM/megatron/core/optimizer_param_scheduler.py.patch flagscale/backends/Megatron-LM/megatron/core/parallel_state.py.patch flagscale/backends/Megatron-LM/megatron/core/timers.py.patch flagscale/backends/Megatron-LM/megatron/core/datasets/blended_dataset.py.patch flagscale/backends/Megatron-LM/megatron/core/datasets/blended_megatron_dataset_builder.py.patch flagscale/backends/Megatron-LM/megatron/core/datasets/gpt_dataset.py.patch flagscale/backends/Megatron-LM/megatron/core/datasets/utils.py.patch flagscale/backends/Megatron-LM/megatron/core/dist_checkpointing/exchange_utils.py.patch flagscale/backends/Megatron-LM/megatron/core/dist_checkpointing/mapping.py.patch flagscale/backends/Megatron-LM/megatron/core/dist_checkpointing/serialization.py.patch flagscale/backends/Megatron-LM/megatron/core/dist_checkpointing/validation.py.patch flagscale/backends/Megatron-LM/megatron/core/dist_checkpointing/strategies/filesystem_async.py.patch flagscale/backends/Megatron-LM/megatron/core/dist_checkpointing/strategies/torch.py.patch flagscale/backends/Megatron-LM/megatron/core/distributed/finalize_model_grads.py.patch flagscale/backends/Megatron-LM/megatron/core/extensions/transformer_engine.py.patch flagscale/backends/Megatron-LM/megatron/core/models/common/embeddings/rotary_pos_embedding.py.patch flagscale/backends/Megatron-LM/megatron/core/models/common/language_module/language_module.py.patch flagscale/backends/Megatron-LM/megatron/core/models/gpt/gpt_layer_specs.py.patch flagscale/backends/Megatron-LM/megatron/core/models/multimodal/llava_model.py.patch flagscale/backends/Megatron-LM/megatron/core/optimizer/__init__.py.patch flagscale/backends/Megatron-LM/megatron/core/optimizer/clip_grads.py.patch flagscale/backends/Megatron-LM/megatron/core/optimizer/optimizer.py.patch flagscale/backends/Megatron-LM/megatron/core/optimizer/optimizer_config.py.patch flagscale/backends/Megatron-LM/megatron/core/pipeline_parallel/p2p_communication.py.patch flagscale/backends/Megatron-LM/megatron/core/pipeline_parallel/schedules.py.patch flagscale/backends/Megatron-LM/megatron/core/transformer/attention.py.patch flagscale/backends/Megatron-LM/megatron/core/transformer/module.py.patch flagscale/backends/Megatron-LM/megatron/core/transformer/transformer_block.py.patch flagscale/backends/Megatron-LM/megatron/core/transformer/transformer_config.py.patch flagscale/backends/Megatron-LM/megatron/core/transformer/transformer_layer.py.patch flagscale/backends/Megatron-LM/megatron/core/transformer/moe/experts.py.patch flagscale/backends/Megatron-LM/megatron/core/transformer/moe/moe_utils.py.patch flagscale/backends/Megatron-LM/megatron/inference/text_generation/sampling.py.patch flagscale/backends/Megatron-LM/megatron/inference/text_generation/tokenization.py.patch flagscale/backends/Megatron-LM/megatron/training/arguments.py.patch flagscale/backends/Megatron-LM/megatron/training/checkpointing.py.patch flagscale/backends/Megatron-LM/megatron/training/global_vars.py.patch flagscale/backends/Megatron-LM/megatron/training/initialize.py.patch flagscale/backends/Megatron-LM/megatron/training/utils.py.patch flagscale/backends/Megatron-LM/megatron/training/tokenizer/gpt2_tokenization.py.patch flagscale/backends/Megatron-LM/megatron/training/tokenizer/tokenization_utils.py.patch flagscale/backends/Megatron-LM/megatron/training/tokenizer/tokenizer.py.patch flagscale/backends/Megatron-LM/tasks/main.py.patch flagscale/backends/Megatron-LM/tasks/aquila/datasets.py.patch flagscale/backends/Megatron-LM/tasks/aquila/evaluate.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/test_parallel_state.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/test_utils.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/data/__init__.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/data/test_builder.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/__init__.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/test_flattened_resharding.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/test_optimizer.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/test_serialization.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/utils.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/models/test_bert_model.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/distributed/test_grad_sync_with_expert_parallel.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/export/trtllm/test_distributed_fp8.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/export/trtllm/test_single_device_fp8.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/models/test_bert_model.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/models/test_llava_model.py.patch flagscale/backends/Megatron-LM/tests/unit_tests/transformer/test_transformer_block.py.patch flagscale/backends/Megatron-LM/tools/preprocess_data.py.patch flagscale/backends/Megatron-LM/tools/checkpoint/saver_legacy.py.patch flagscale/backends/llama.cpp/README.md flagscale/backends/sglang/python/sglang/__init__.py.patch flagscale/backends/sglang/python/sglang/srt/model_executor/model_runner.py.patch flagscale/backends/vllm/vllm/sampling_params.py.patch flagscale/backends/vllm/vllm/sequence.py.patch flagscale/backends/vllm/vllm/core/block_manager.py.patch flagscale/backends/vllm/vllm/core/scheduler.py.patch flagscale/backends/vllm/vllm/distributed/kv_transfer/kv_connector/factory.py.patch flagscale/backends/vllm/vllm/distributed/kv_transfer/kv_connector/p2p_connector.py.patch flagscale/backends/vllm/vllm/distributed/kv_transfer/kv_connector/v1/p2p/flagcx_p2p_nccl_engine.py.patch flagscale/backends/vllm/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py.patch flagscale/backends/vllm/vllm/distributed/kv_transfer/kv_pipe/flagcx_p2p_nccl_pipe.py.patch flagscale/backends/vllm/vllm/distributed/kv_transfer/kv_pipe/p2p_nccl_pipe.py.patch flagscale/backends/vllm/vllm/engine/llm_engine.py.patch flagscale/backends/vllm/vllm/engine/output_processor/single_step.py.patch flagscale/backends/vllm/vllm/inputs/data.py.patch flagscale/backends/vllm/vllm/inputs/preprocess.py.patch flagscale/backends/vllm/vllm/model_executor/sampling_metadata.py.patch flagscale/backends/vllm/vllm/model_executor/layers/logits_processor.py.patch flagscale/backends/vllm/vllm/model_executor/models/llava_onevision.py.patch flagscale/backends/vllm/vllm/model_executor/models/minicpmv.py.patch flagscale/backends/vllm/vllm/model_executor/models/registry.py.patch flagscale/backends/vllm/vllm/model_executor/models/siglip.py.patch flagscale/backends/vllm/vllm/v1/worker/gpu_model_runner.py.patch flagscale/backends/vllm/vllm/worker/model_runner.py.patch flagscale/compress/__init__.py flagscale/compress/adapter.py flagscale/compress/combined_algo.py flagscale/compress/compressor.py flagscale/compress/compressor_emu3.py flagscale/compress/compressor_llava_ov.py flagscale/compress/algo/__init__.py flagscale/compress/algo/algo_base.py flagscale/inference/__init__.py flagscale/inference/arguments.py flagscale/inference/diffusion_entrypoint.py flagscale/inference/inference_aquila.py flagscale/inference/inference_emu3.py flagscale/inference/inference_engine.py flagscale/inference/processing_emu3.py flagscale/inference/runtime_context.py flagscale/runner/__init__.py flagscale/runner/runner_base.py flagscale/runner/runner_compress.py flagscale/runner/runner_inference.py flagscale/runner/runner_rl.py flagscale/runner/runner_serve.py flagscale/runner/runner_train.py flagscale/runner/utils.py flagscale/runner/auto_tuner/__init__.py flagscale/runner/auto_tuner/generate.py flagscale/runner/auto_tuner/memory_model.py flagscale/runner/auto_tuner/platform.py flagscale/runner/auto_tuner/tuner.py flagscale/runner/auto_tuner/utils.py flagscale/runner/auto_tuner/prune/history.py flagscale/runner/auto_tuner/prune/memory.py flagscale/runner/auto_tuner/prune/pruner.py flagscale/runner/auto_tuner/record/recorder.py flagscale/runner/auto_tuner/search/algorithm.py flagscale/runner/auto_tuner/search/searcher.py flagscale/runner/estimator/__init__.py flagscale/runner/estimator/meta_attention.py flagscale/runner/estimator/meta_base.py flagscale/runner/estimator/meta_functional.py flagscale/runner/estimator/meta_gpt.py flagscale/runner/estimator/meta_mlp.py flagscale/runner/estimator/meta_modules.py flagscale/runner/estimator/meta_registry.py flagscale/runner/estimator/meta_tensor.py flagscale/runner/estimator/meta_transformer_layer.py flagscale/runner/estimator/utils.py flagscale/serve/README.md flagscale/serve/__init__.py flagscale/serve/arguments.py flagscale/serve/core.py flagscale/serve/dag_utils.py flagscale/serve/engine.py flagscale/serve/run_disagg_xpyd_router.py flagscale/serve/run_fs_serve_vllm.py flagscale/serve/run_inference_engine.py flagscale/serve/run_serve.py flagscale/serve/utils.py flagscale/serve/args_mapping/README.md flagscale/serve/args_mapping/common_args.yaml flagscale/serve/args_mapping/gen_template_conf.py flagscale/serve/args_mapping/gen_template_funcs.py flagscale/serve/args_mapping/mapping.py flagscale/serve/args_mapping/mapping.yaml flagscale/serve/args_mapping/mapping_funcs/llama_cpp.py flagscale/serve/metric/__init__.py flagscale/serve/metric/serve_metric.py flagscale/train/__init__.py flagscale/train/arguments.py flagscale/train/extra_valid.py flagscale/train/global_vars.py flagscale/train/spiky_loss.py flagscale/train/stablelm2_scheduler.py flagscale/train/theoretical_memory_usage.py flagscale/train/train.py flagscale/train/train_aquila_sft.py flagscale/train/train_gpt.py flagscale/train/train_llava.py flagscale/train/train_llava_onevision.py flagscale/train/train_qwen2_5_vl.py flagscale/train/datasets/concated_indexed_dataset.py flagscale/train/datasets/sft_dataset.py flagscale/train/dualpipev/dualpipev_schedules.py flagscale/train/dualpipev/fb_overlap/gpt_model.py flagscale/train/dualpipev/fb_overlap/transformer_block.py flagscale/train/dualpipev/fb_overlap/transformer_layer.py flagscale/train/dualpipev/fb_overlap/modules/attention.py flagscale/train/dualpipev/fb_overlap/modules/token_dispatcher.py flagscale/train/dualpipev/fb_overlap/modules/utils.py flagscale/train/dualpipev/fb_overlap/overlap_funcs/bwd.py flagscale/train/dualpipev/fb_overlap/overlap_funcs/fwd.py flagscale/train/dualpipev/fb_overlap/overlap_funcs/fwdbwd.py flagscale/train/hetero/__init__.py flagscale/train/hetero/p2p_communication.py flagscale/train/hetero/parallel_context.py flagscale/train/models/__init__.py flagscale/train/models/llava_onevision/__init__.py flagscale/train/models/llava_onevision/clip_vit_model.py flagscale/train/models/llava_onevision/config.py flagscale/train/models/llava_onevision/dataloader_provider.py flagscale/train/models/llava_onevision/dataset_helpers.py flagscale/train/models/llava_onevision/layer_specs.py flagscale/train/models/llava_onevision/llava_onevision_model.py flagscale/train/models/qwen2_5_vl/QuickStart.md flagscale/train/models/qwen2_5_vl/__init__.py flagscale/train/models/qwen2_5_vl/language_module.py flagscale/train/models/qwen2_5_vl/layer_specs.py flagscale/train/models/qwen2_5_vl/qwen2_5_vl_model.py flagscale/train/models/qwen2_5_vl/tensor_parallel.py flagscale/train/models/qwen2_5_vl/transformer_config.py flagscale/train/models/qwen2_5_vl/vision_attention.py flagscale/train/models/qwen2_5_vl/vision_transformer_block.py flagscale/train/models/qwen2_5_vl/vit_model.py flagscale/transforms/__init__.py flagscale/transforms/hook.py flagscale/transforms/log_io_transformation.py flagscale/transforms/state_scope_transformation.py flagscale/transforms/state_store.py flagscale/transforms/transformation.py hardware/LICENSE hardware/LICENSE_CN hardware/README.md hardware/README_CN.md hardware/patch_history.yaml hardware/BI_V150/FlagScale/diff.patch.encrypted hardware/BI_V150/FlagScale/diff.yaml hardware/BI_V150/Megatron-LM/diff.patch.encrypted hardware/BI_V150/Megatron-LM/diff.yaml hardware/BI_V150/vllm/diff.patch.encrypted hardware/BI_V150/vllm/diff.yaml hardware/Cambricon_MLU/FlagScale/diff.patch hardware/Cambricon_MLU/FlagScale/diff.yaml hardware/Cambricon_MLU/vllm/diff.patch hardware/Cambricon_MLU/vllm/diff.yaml hardware/Huawei_Atlas800TA3/FlagScale/diff.patch.encrypted hardware/Huawei_Atlas800TA3/FlagScale/diff.yaml hardware/Huawei_Atlas800TA3/Megatron-LM/diff.patch.encrypted hardware/Huawei_Atlas800TA3/Megatron-LM/diff.yaml hardware/Huawei_Atlas800TA3/vllm/diff.patch.encrypted hardware/Huawei_Atlas800TA3/vllm/diff.yaml hardware/Hygon_BW1000/FlagScale/diff.patch.encrypted hardware/Hygon_BW1000/FlagScale/diff.yaml hardware/Hygon_BW1000/Megatron-LM/diff.patch hardware/Hygon_BW1000/Megatron-LM/diff.yaml hardware/Hygon_BW1000/vllm/diff.patch.encrypted hardware/Hygon_BW1000/vllm/diff.yaml hardware/Kunlunxin_R310p/FlagScale/diff.patch.encrypted hardware/Kunlunxin_R310p/FlagScale/diff.yaml hardware/Kunlunxin_R310p/Megatron-LM/diff.patch.encrypted hardware/Kunlunxin_R310p/Megatron-LM/diff.yaml hardware/Kunlunxin_R310p/vllm/diff.patch.encrypted hardware/Kunlunxin_R310p/vllm/diff.yaml hardware/MUSA_S5000/FlagScale/diff.patch hardware/MUSA_S5000/FlagScale/diff.yaml hardware/MUSA_S5000/Megatron-LM/diff.patch hardware/MUSA_S5000/Megatron-LM/diff.yaml hardware/MUSA_S5000/vllm/diff.patch hardware/MUSA_S5000/vllm/diff.yaml hardware/Metax_C550/FlagScale/diff.patch hardware/Metax_C550/FlagScale/diff.yaml hardware/Metax_C550/Megatron-LM/diff.patch hardware/Metax_C550/Megatron-LM/diff.yaml hardware/Metax_C550/vllm/diff.patch hardware/Metax_C550/vllm/diff.yaml hardware/Tsing_micro/FlagScale/diff.patch.encrypted hardware/Tsing_micro/FlagScale/diff.yaml hardware/Tsing_micro/Megatron-LM/diff.patch.encrypted hardware/Tsing_micro/Megatron-LM/diff.yaml hardware/Tsing_micro/vllm/diff.patch.encrypted hardware/Tsing_micro/vllm/diff.yaml install/README.md install/install-requirements-metax.sh install/install-requirements.sh requirements/requirements-base.txt requirements/requirements-common.txt requirements/compress/requirements-dev.txt requirements/inference/requirements.txt requirements/inference/vllm/README.md requirements/serving/requirements.txt requirements/train/megatron/requirements-cuda.txt tests/README.md tests/functional_tests/test_cases/hetero_train/aquila/conf/dp2dp4_shared_embedding.yaml tests/functional_tests/test_cases/hetero_train/aquila/conf/tp2dp1pp1_tp2dp2pp1_tp1dp2pp1.yaml tests/functional_tests/test_cases/hetero_train/aquila/conf/tp2pp1_tp4pp1_tp2pp1.yaml tests/functional_tests/test_cases/hetero_train/aquila/conf/train/dp2dp4_shared_embedding.yaml tests/functional_tests/test_cases/hetero_train/aquila/conf/train/tp2dp1pp1_tp2dp2pp1_tp1dp2pp1.yaml tests/functional_tests/test_cases/hetero_train/aquila/conf/train/tp2pp1_tp4pp1_tp2pp1.yaml tests/functional_tests/test_cases/hetero_train/aquila/results_gold/dp2dp4_shared_embedding.json tests/functional_tests/test_cases/hetero_train/aquila/results_gold/tp2dp1pp1_tp2dp2pp1_tp1dp2pp1.json tests/functional_tests/test_cases/hetero_train/aquila/results_gold/tp2pp1_tp4pp1_tp2pp1.json tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-flaggems-metax/conf/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-flaggems-metax/conf/inference/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-flaggems-metax/results_gold/7b-tp2 tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-flaggems/conf/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-flaggems/conf/inference/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-flaggems/results_gold/7b-tp2 tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-metax/conf/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-metax/conf/inference/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-metax/results_gold/7b-tp2 tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen/conf/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen/conf/inference/7b-tp2.yaml tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen/results_gold/7b-tp2 tests/functional_tests/test_cases/inference/opi_llama3_1_instruct-flaggems-metax/conf/8b-tp2.yaml tests/functional_tests/test_cases/inference/opi_llama3_1_instruct-flaggems-metax/conf/inference/8b-tp2.yaml tests/functional_tests/test_cases/inference/opi_llama3_1_instruct-flaggems-metax/results_gold/8b-tp2 tests/functional_tests/test_cases/inference/opi_llama3_1_instruct-metax/conf/8b-tp2.yaml tests/functional_tests/test_cases/inference/opi_llama3_1_instruct-metax/conf/inference/8b-tp2.yaml tests/functional_tests/test_cases/inference/opi_llama3_1_instruct-metax/results_gold/8b-tp2 tests/functional_tests/test_cases/inference/qwen3-flaggems-metax/conf/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3-flaggems-metax/conf/inference/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3-flaggems-metax/results_gold/4b-tp2 tests/functional_tests/test_cases/inference/qwen3-flaggems/conf/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3-flaggems/conf/inference/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3-flaggems/results_gold/4b-tp2 tests/functional_tests/test_cases/inference/qwen3-metax/conf/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3-metax/conf/inference/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3-metax/results_gold/4b-tp2 tests/functional_tests/test_cases/inference/qwen3/conf/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3/conf/inference/4b-tp2.yaml tests/functional_tests/test_cases/inference/qwen3/results_gold/4b-tp2 tests/functional_tests/test_cases/rl/qwen2_5/conf/0_5b.yaml tests/functional_tests/test_cases/rl/qwen2_5/conf/rl/0_5b.yaml tests/functional_tests/test_cases/rl/qwen2_5/results_gold/0_5b.json tests/functional_tests/test_cases/serve/base/conf/multiple_model.yaml tests/functional_tests/test_cases/serve/base/conf/serve/multiple_model.yaml tests/functional_tests/test_cases/serve/qwen2_5/conf/0.5b.yaml tests/functional_tests/test_cases/serve/qwen2_5/conf/0.5b_multiple_instance.yaml tests/functional_tests/test_cases/serve/qwen2_5/conf/serve/0.5b.yaml tests/functional_tests/test_cases/serve/qwen2_5/conf/serve/0.5b_multiple_instance.yaml tests/functional_tests/test_cases/serve/utils/models/base_model.py tests/functional_tests/test_cases/serve/utils/models/util_models/util_model.py tests/functional_tests/test_cases/train/aquila/conf/tp2_pp2.yaml tests/functional_tests/test_cases/train/aquila/conf/tp4_pp2.yaml tests/functional_tests/test_cases/train/aquila/conf/train/tp2_pp2.yaml tests/functional_tests/test_cases/train/aquila/conf/train/tp4_pp2.yaml tests/functional_tests/test_cases/train/aquila/results_gold/tp2_pp2.json tests/functional_tests/test_cases/train/aquila/results_gold/tp4_pp2.json tests/functional_tests/test_cases/train/deepseek/conf/tp2_pp2_ep2.yaml tests/functional_tests/test_cases/train/deepseek/conf/train/tp2_pp2_ep2.yaml tests/functional_tests/test_cases/train/deepseek/results_gold/tp2_pp2_ep2.json tests/functional_tests/test_cases/train/llava_onevision/pretrain_dataset.yaml tests/functional_tests/test_cases/train/llava_onevision/conf/tp2.yaml tests/functional_tests/test_cases/train/llava_onevision/conf/tp4.yaml tests/functional_tests/test_cases/train/llava_onevision/conf/train/tp2.yaml tests/functional_tests/test_cases/train/llava_onevision/conf/train/tp4.yaml tests/functional_tests/test_cases/train/llava_onevision/results_gold/tp2.json tests/functional_tests/test_cases/train/llava_onevision/results_gold/tp4.json tests/functional_tests/test_cases/train/mixtral/conf/tp2_pp1_ep2.yaml tests/functional_tests/test_cases/train/mixtral/conf/tp4_pp1_ep2.yaml tests/functional_tests/test_cases/train/mixtral/conf/train/tp2_pp1_ep2.yaml tests/functional_tests/test_cases/train/mixtral/conf/train/tp4_pp1_ep2.yaml tests/functional_tests/test_cases/train/mixtral/results_gold/tp2_pp1_ep2.json tests/functional_tests/test_cases/train/mixtral/results_gold/tp4_pp1_ep2.json tests/functional_tests/test_utils/conftest.py tests/functional_tests/test_utils/test_call.py tests/functional_tests/test_utils/test_result.py tests/scripts/_gpu_check.sh tests/scripts/_tests_log.sh tests/scripts/_tests_stop.sh tests/scripts/utils.sh tests/scripts/functional_tests/config.yml tests/scripts/functional_tests/parse_config.py tests/scripts/functional_tests/test_all.sh tests/scripts/functional_tests/test_task.sh tests/scripts/unit_tests/config.yml tests/scripts/unit_tests/parse_config.py tests/scripts/unit_tests/test_all.sh tests/scripts/unit_tests/test_coverage.sh tests/scripts/unit_tests/test_subset.sh tests/unit_tests/test_basic.py tests/unit_tests/test_parallel_context.py tests/unit_tests/test_spiky_loss_detector.py tests/unit_tests/utilities.py tests/unit_tests/compressor/test_adapter.py tests/unit_tests/compressor/test_compressor.py tests/unit_tests/compressor/test_config.yaml tests/unit_tests/inference/test_runtime_context.py tests/unit_tests/runner/test_parse_hostfile.py tests/unit_tests/runner/estimator/test_meta_attention.py tests/unit_tests/runner/estimator/test_meta_base.py tests/unit_tests/runner/estimator/test_meta_gpt.py tests/unit_tests/runner/estimator/test_meta_mlp.py tests/unit_tests/runner/estimator/test_meta_modules.py tests/unit_tests/runner/estimator/test_meta_registry.py tests/unit_tests/runner/estimator/test_meta_tensor.py tests/unit_tests/runner/estimator/test_meta_transformer_layer.py tests/unit_tests/transforms/__init__.py tests/unit_tests/transforms/test_hook.py tests/unit_tests/transforms/test_state_scope_transform.py tests/unit_tests/transforms/test_state_store.py tools/checkpoint/convert.py tools/checkpoint/loader_mcore.py tools/checkpoint/loader_transformers.py tools/checkpoint/run.sh tools/checkpoint/saver_mcore.py tools/checkpoint/saver_transformers.py tools/checkpoint/utils.py tools/checkpoint/aquila/args.py tools/checkpoint/aquila/ckpt.py tools/checkpoint/aquila/model.py tools/checkpoint/deepseek_v3/args.py tools/checkpoint/deepseek_v3/ckpt.py tools/checkpoint/deepseek_v3/model.py tools/checkpoint/llama/args.py tools/checkpoint/llama/ckpt.py tools/checkpoint/llama/model.py tools/checkpoint/llava_onevision/combine.py tools/checkpoint/llava_onevision/convert_mlp.py tools/checkpoint/llava_onevision/convert_qwen2.5_1.5b.py tools/checkpoint/llava_onevision/convert_qwen2.5_7b.py tools/checkpoint/llava_onevision/convert_siglip.py tools/checkpoint/llava_onevision/convert_to_fs_qwen2.5_1.5b.py tools/checkpoint/llava_onevision/convert_to_fs_qwen2.5_7b.py tools/checkpoint/llava_onevision/convert_to_hf_qwen2.5_1.5b.py tools/checkpoint/llava_onevision/convert_to_hf_qwen2.5_7b.py tools/checkpoint/mistral/args.py tools/checkpoint/mistral/ckpt.py tools/checkpoint/mistral/model.py tools/checkpoint/mixtral/args.py tools/checkpoint/mixtral/ckpt.py tools/checkpoint/mixtral/model.py tools/checkpoint/moonlight_deepseek/configuration_deepseek.py tools/checkpoint/moonlight_deepseek/modeling_deepseek.py tools/checkpoint/qwen2_5_vl/convert.md tools/checkpoint/qwen2_5_vl/hf2mcore_qwen2.5_vl.py tools/checkpoint/qwen2_5_vl/hf2mcore_qwen2.5_vl_convertor.sh tools/checkpoint/qwen2_5_vl/utils.py tools/checkpoint/qwen3/args.py tools/checkpoint/qwen3/ckpt.py tools/checkpoint/qwen3/model.py tools/checkpoint/qwen3/modeling_hf/configuration_qwen3.py tools/checkpoint/qwen3/modeling_hf/modeling_qwen3.py tools/checkpoint/sfpt_ckpt/README.md tools/checkpoint/sfpt_ckpt/dcp_to_sfpt.py tools/checkpoint/sfpt_ckpt/sfpt_to_dcp.py tools/codestyle/check_header.py tools/codestyle/pre-commit.sh tools/datasets/llava_onevision/README.md tools/datasets/llava_onevision/llava_ov_wds.py tools/datasets/llava_onevision/make_llava_ov_wds.sh tools/datasets/qwenvl/build_llava_frame_dataset.py tools/datasets/qwenvl/convert_custom_dataset_to_wds_chatml_str.py tools/datasets/qwenvl/convert_llava_pretrain_to_wds.py tools/datasets/qwenvl/dataset_preparation.md tools/datasets/qwenvl/replace_llava_image_key.py tools/datasets/qwenvl/data/__init__.py tools/datasets/qwenvl/data/dataset_helpers.py tools/datasets/qwenvl/data/image_processing.py tools/datasets/qwenvl/data/utils.py tools/datasets/qwenvl/data/energon/chatml.py tools/estimator/estimate_gpt.py tools/patch/__init__.py tools/patch/encryption_utils.py tools/patch/file_utils.py tools/patch/git_utils.py tools/patch/logger_utils.py tools/patch/merge.py tools/patch/patch.py tools/patch/patch_v2.py tools/patch/unpatch.py tools/patch/unpatch_v2.py tools/patch/__pycache__/encryption_utils.cpython-312.pyc tools/patch/__pycache__/git_utils.cpython-312.pyc tools/patch/__pycache__/logger_utils.cpython-312.pyc tools/patch/__pycache__/patch.cpython-312.pyc