From b98ad6c1cd9ff6ef9f20c772c8484425a6fcb075 Mon Sep 17 00:00:00 2001 From: michelle-yooh Date: Thu, 16 May 2024 00:14:24 +0000 Subject: [PATCH] Remove async XLA_FLAGS from A3 configs --- end_to_end/gpu/a3/test_llama2_7b.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/end_to_end/gpu/a3/test_llama2_7b.sh b/end_to_end/gpu/a3/test_llama2_7b.sh index 213f03c82..14c5c13a1 100644 --- a/end_to_end/gpu/a3/test_llama2_7b.sh +++ b/end_to_end/gpu/a3/test_llama2_7b.sh @@ -53,9 +53,8 @@ export NVTE_FUSED_ATTN=1 export NCCL_DEBUG=VERSION export XLA_FLAGS="--xla_dump_to=$BASE_OUTPUT_PATH/$RUN_NAME/HLO_dumps/ ---xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_async_all_gather=true ---xla_gpu_enable_async_reduce_scatter=true --xla_gpu_enable_triton_gemm=false - --xla_gpu_graph_level=0 --xla_gpu_enable_async_all_reduce=true --xla_gpu_enable_highest_priority_async_stream=true +--xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_triton_gemm=false + --xla_gpu_graph_level=0 --xla_gpu_enable_highest_priority_async_stream=true --xla_gpu_all_reduce_combine_threshold_bytes=134217728 --xla_gpu_all_gather_combine_threshold_bytes=134217728 --xla_gpu_reduce_scatter_combine_threshold_bytes=67108864 --xla_gpu_enable_pipelined_all_gather=true --xla_gpu_enable_pipelined_reduce_scatter=true --xla_gpu_enable_pipelined_all_reduce=true