diff --git a/MaxText/configs/a3/llama_2_7b/16vm.sh b/MaxText/configs/a3/llama_2_7b/16vm.sh
index fa07a470e..819407578 100644
--- a/MaxText/configs/a3/llama_2_7b/16vm.sh
+++ b/MaxText/configs/a3/llama_2_7b/16vm.sh
@@ -26,7 +26,7 @@ export NVTE_FUSED_ATTN=1
 export NCCL_DEBUG=VERSION
 export XLA_FLAGS="--xla_dump_to=$OUTPUT_PATH/$RUN_NAME/HLO_dumps/
 --xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_async_all_gather=true --xla_gpu_enable_async_reduce_scatter=true
---xla_gpu_enable_triton_gemm=false --xla_gpu_simplify_all_fp_conversions --xla_gpu_graph_level=0
+--xla_gpu_enable_triton_gemm=false --xla_gpu_graph_level=0
 --xla_gpu_enable_async_all_reduce=true --xla_gpu_enable_highest_priority_async_stream=true
 --xla_gpu_all_reduce_combine_threshold_bytes=1073741824 --xla_gpu_all_gather_combine_threshold_bytes=134217728
 --xla_gpu_reduce_scatter_combine_threshold_bytes=134217728 --xla_gpu_enable_pipelined_all_gather=true
diff --git a/MaxText/configs/a3/llama_2_7b/1vm.sh b/MaxText/configs/a3/llama_2_7b/1vm.sh
index 9120b9311..fb6fac5fe 100644
--- a/MaxText/configs/a3/llama_2_7b/1vm.sh
+++ b/MaxText/configs/a3/llama_2_7b/1vm.sh
@@ -26,7 +26,7 @@ export NVTE_FUSED_ATTN=1
 export NCCL_DEBUG=VERSION
 export XLA_FLAGS="--xla_dump_to=$OUTPUT_PATH/$RUN_NAME/HLO_dumps/
 --xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_async_all_gather=true
---xla_gpu_enable_async_reduce_scatter=true --xla_gpu_enable_triton_gemm=false --xla_gpu_simplify_all_fp_conversions
+--xla_gpu_enable_async_reduce_scatter=true --xla_gpu_enable_triton_gemm=false
  --xla_gpu_graph_level=0 --xla_gpu_enable_async_all_reduce=true --xla_gpu_enable_highest_priority_async_stream=true
  --xla_gpu_all_reduce_combine_threshold_bytes=134217728 --xla_gpu_all_gather_combine_threshold_bytes=134217728
  --xla_gpu_reduce_scatter_combine_threshold_bytes=67108864 --xla_gpu_enable_pipelined_all_gather=true
diff --git a/MaxText/configs/a3/llama_2_7b/2vm.sh b/MaxText/configs/a3/llama_2_7b/2vm.sh
index d1ee728ac..3723eb522 100644
--- a/MaxText/configs/a3/llama_2_7b/2vm.sh
+++ b/MaxText/configs/a3/llama_2_7b/2vm.sh
@@ -26,7 +26,7 @@ export NVTE_FUSED_ATTN=1
 export NCCL_DEBUG=VERSION
 export XLA_FLAGS="--xla_dump_to=$OUTPUT_PATH/$RUN_NAME/HLO_dumps/
 --xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_async_all_gather=true --xla_gpu_enable_async_reduce_scatter=true
---xla_gpu_enable_triton_gemm=false --xla_gpu_simplify_all_fp_conversions --xla_gpu_graph_level=0
+--xla_gpu_enable_triton_gemm=false --xla_gpu_graph_level=0
 --xla_gpu_enable_async_all_reduce=true --xla_gpu_enable_highest_priority_async_stream=true
 --xla_gpu_all_reduce_combine_threshold_bytes=67108864 --xla_gpu_all_gather_combine_threshold_bytes=134217728
 --xla_gpu_reduce_scatter_combine_threshold_bytes=67108864 --xla_gpu_enable_pipelined_all_gather=true
diff --git a/MaxText/configs/a3/llama_2_7b/4vm.sh b/MaxText/configs/a3/llama_2_7b/4vm.sh
index 7712d036a..931c3a9bf 100644
--- a/MaxText/configs/a3/llama_2_7b/4vm.sh
+++ b/MaxText/configs/a3/llama_2_7b/4vm.sh
@@ -26,7 +26,7 @@ export NVTE_FUSED_ATTN=1
 export NCCL_DEBUG=VERSION
 export XLA_FLAGS="--xla_dump_to=$OUTPUT_PATH/$RUN_NAME/HLO_dumps/
 --xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_async_all_gather=true
---xla_gpu_enable_async_reduce_scatter=true --xla_gpu_enable_triton_gemm=false --xla_gpu_simplify_all_fp_conversions
+--xla_gpu_enable_async_reduce_scatter=true --xla_gpu_enable_triton_gemm=false
 --xla_gpu_graph_level=0 --xla_gpu_enable_async_all_reduce=true --xla_gpu_enable_highest_priority_async_stream=true
 --xla_gpu_all_reduce_combine_threshold_bytes=536870912 --xla_gpu_all_gather_combine_threshold_bytes=134217728
 --xla_gpu_reduce_scatter_combine_threshold_bytes=67108864 --xla_gpu_enable_pipelined_all_gather=true
diff --git a/MaxText/configs/a3/llama_2_7b/8vm.sh b/MaxText/configs/a3/llama_2_7b/8vm.sh
index bb003f2f7..057c5f0e9 100644
--- a/MaxText/configs/a3/llama_2_7b/8vm.sh
+++ b/MaxText/configs/a3/llama_2_7b/8vm.sh
@@ -26,7 +26,7 @@ export NVTE_FUSED_ATTN=1
 export NCCL_DEBUG=VERSION
 export XLA_FLAGS="--xla_dump_to=$OUTPUT_PATH/$RUN_NAME/HLO_dumps/
 --xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_async_all_gather=true --xla_gpu_enable_async_reduce_scatter=true
---xla_gpu_enable_triton_gemm=false --xla_gpu_simplify_all_fp_conversions --xla_gpu_graph_level=0
+--xla_gpu_enable_triton_gemm=false --xla_gpu_graph_level=0
 --xla_gpu_enable_async_all_reduce=true --xla_gpu_enable_highest_priority_async_stream=true
 --xla_gpu_all_reduce_combine_threshold_bytes=1073741824 --xla_gpu_all_gather_combine_threshold_bytes=134217728
 --xla_gpu_reduce_scatter_combine_threshold_bytes=67108864 --xla_gpu_enable_pipelined_all_gather=true
diff --git a/end_to_end/gpu/a3/test_llama2_7b.sh b/end_to_end/gpu/a3/test_llama2_7b.sh
index 7a6a9e0c3..213f03c82 100644
--- a/end_to_end/gpu/a3/test_llama2_7b.sh
+++ b/end_to_end/gpu/a3/test_llama2_7b.sh
@@ -54,7 +54,7 @@ export NCCL_DEBUG=VERSION
 
 export XLA_FLAGS="--xla_dump_to=$BASE_OUTPUT_PATH/$RUN_NAME/HLO_dumps/
 --xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_async_all_gather=true
---xla_gpu_enable_async_reduce_scatter=true --xla_gpu_enable_triton_gemm=false --xla_gpu_simplify_all_fp_conversions
+--xla_gpu_enable_async_reduce_scatter=true --xla_gpu_enable_triton_gemm=false
  --xla_gpu_graph_level=0 --xla_gpu_enable_async_all_reduce=true --xla_gpu_enable_highest_priority_async_stream=true
  --xla_gpu_all_reduce_combine_threshold_bytes=134217728 --xla_gpu_all_gather_combine_threshold_bytes=134217728
  --xla_gpu_reduce_scatter_combine_threshold_bytes=67108864 --xla_gpu_enable_pipelined_all_gather=true