LICENSE
MANIFEST.in
README.md
setup.py
/data01/home/zhengningxin/opensource/flux/src/all_gather/ths_op/all_gather_gemm_kernel.cc
/data01/home/zhengningxin/opensource/flux/src/comm_none/ths_op/gemm_only.cc
/data01/home/zhengningxin/opensource/flux/src/reduce_scatter/ths_op/gemm_reduce_scatter.cc
/data01/home/zhengningxin/opensource/flux/src/reduce_scatter/ths_op/helper_ops.cc
/data01/home/zhengningxin/opensource/flux/src/ths_op/flux_shm.cc
/data01/home/zhengningxin/opensource/flux/src/ths_op/helper_ops.cc
/data01/home/zhengningxin/opensource/flux/src/ths_op/ths_op.cc
/data01/home/zhengningxin/opensource/flux/src/ths_op/topo_utils.cc
include/flux/flux.h
include/flux/gemm_hparams.h
include/flux/gemm_meta.h
include/flux/gemm_operator_base.h
include/flux/op_registry.h
include/flux/op_registry_proto_utils.h
include/flux/runtime_config.h
include/flux/utils.h
include/flux/args/all_gather.h
include/flux/args/comm_none.h
include/flux/args/reduce_scatter.h
include/flux/cuda/cuda_common.h
include/flux/cuda/cuda_common_device.hpp
include/flux/cuda/cuda_stub.h
include/flux/cuda/cutlass_v3_builder.hpp
include/flux/cuda/helper_kernels.h
include/flux/cuda/memory_utils.hpp
include/flux/cuda/nvml_stub.h
include/flux/cuda/reduce_utils.cuh
include/flux/cuda/system_barrier.hpp
include/flux/cuda/gemm_impls/gemm_grouped_impl.hpp
include/flux/cuda/gemm_impls/gemm_operator_base_default_impl.hpp
include/flux/cuda/gemm_impls/gemm_v2_impl.hpp
include/flux/cuda/gemm_impls/gemm_v3_impl.hpp
include/flux/ths_op/flux_shm.h
include/flux/ths_op/ths_op.h
include/flux/ths_op/topo_utils.h
include/flux/ths_op/util.h
python/byte_flux.egg-info/PKG-INFO
python/byte_flux.egg-info/SOURCES.txt
python/byte_flux.egg-info/dependency_links.txt
python/byte_flux.egg-info/requires.txt
python/byte_flux.egg-info/top_level.txt
python/flux/__init__.py
python/flux/ag_gemm.py
python/flux/ag_kernel_crossnode.py
python/flux/cpp_mod.py
python/flux/cpp_mod.pyi
python/flux/dist_utils.py
python/flux/gemm_rs_sm80.py
python/flux/util.py
src/CMakeLists.txt
src/all_gather/CMakeLists.txt
src/all_gather/all_gather_swizzle.hpp
src/all_gather/gemm_v2_ag_kernel.hpp
src/all_gather/gemm_v3_ag_kernel.hpp
src/all_gather/sm80_all_gather_gemm.hpp
src/all_gather/sm80_all_gather_gemm_threadblock_swizzle.hpp
src/all_gather/sm80_gemm_universal_with_visitor.hpp
src/all_gather/sm90_all_gather_gemm_tile_scheduler.hpp
src/all_gather/sm90_all_gather_gemm_tma_warpspecialized_cooperative.hpp
src/all_gather/ths_op/all_gather_gemm_kernel.cc
src/all_gather/ths_op/all_gather_gemm_kernel_crossnode.cc
src/all_gather/ths_op/all_gather_ring_order.h
src/all_gather/ths_op/all_gather_types.h
src/all_gather/ths_op/transfers.hpp
src/all_gather/tuning_config/config_ag_gemm_kernel_sm80_tp1_nnodes1.cu
src/all_gather/tuning_config/config_ag_gemm_kernel_sm80_tp2_nnodes1.cu
src/all_gather/tuning_config/config_ag_gemm_kernel_sm80_tp4_nnodes1.cu
src/all_gather/tuning_config/config_ag_gemm_kernel_sm80_tp8_nnodes1.cu
src/all_gather/tuning_config/config_ag_gemm_kernel_sm90_tp1_nnodes1.cu
src/all_gather/tuning_config/config_ag_gemm_kernel_sm90_tp2_nnodes1.cu
src/all_gather/tuning_config/config_ag_gemm_kernel_sm90_tp4_nnodes1.cu
src/all_gather/tuning_config/config_ag_gemm_kernel_sm90_tp8_nnodes1.cu
src/comm_none/CMakeLists.txt
src/comm_none/gemm_v2_comm_none.hpp
src/comm_none/gemm_v3_comm_none.hpp
src/comm_none/test/CMakeLists.txt
src/comm_none/test/test_gemm_only.cc
src/comm_none/ths_op/gemm_only.cc
src/cuda/CMakeLists.txt
src/cuda/bitwise_check.cu
src/cuda/cuda_common.cc
src/cuda/cuda_stub.cc
src/cuda/cudaipc_barrier_all.cu
src/cuda/gemm_op_registry.cu.in
src/cuda/nvml_stub.cc
src/cuda/op_registry.cu
src/cuda/op_registry_proto_utils.cc
src/cuda/random_initialize.cu
src/cuda/utils.cc
src/cuda/version.ld
src/reduce_scatter/CMakeLists.txt
src/reduce_scatter/bsr_reduce.cu
src/reduce_scatter/bsr_reduce.hpp
src/reduce_scatter/epilogue_evt.hpp
src/reduce_scatter/epilogue_evt_nvshmem.hpp
src/reduce_scatter/epilogue_nvshmem_reduce_scatter.hpp
src/reduce_scatter/epilogue_reduce_scatter.hpp
src/reduce_scatter/epilogue_vectorized_reduce_scatter.hpp
src/reduce_scatter/gemm_v2_reduce_scatter.hpp
src/reduce_scatter/gemm_v3_reduce_scatter.hpp
src/reduce_scatter/gemmk_universal_with_visitor.hpp
src/reduce_scatter/gemmk_universal_with_visitor_streamk.h
src/reduce_scatter/gemmk_visitor_load.hpp
src/reduce_scatter/reduce_scatter_barrier_struct.hpp
src/reduce_scatter/reduce_scatter_kernel.hpp
src/reduce_scatter/sm90_epilogue_evt.hpp
src/reduce_scatter/sm90_gemm_tma_warpspecialized_cooperative_reduce_scatter.hpp
src/reduce_scatter/sm90_reduce_scatter_utils.hpp
src/reduce_scatter/visitor_2x_bsr.hpp
src/reduce_scatter/test/CMakeLists.txt
src/reduce_scatter/test/test_gemm_rs.cc
src/reduce_scatter/ths_op/gemm_reduce_scatter.cc
src/reduce_scatter/ths_op/helper_ops.cc
src/reduce_scatter/ths_op/helper_ops.h
src/reduce_scatter/tile_scheduler/sm90_tile_scheduler_reduce_scatter.hpp
src/reduce_scatter/tile_scheduler/threadblock_swizzle.hpp
src/reduce_scatter/tile_scheduler/threadblock_swizzle_acrossnode.hpp
src/reduce_scatter/tile_scheduler/tile_mappings.hpp
src/reduce_scatter/tuning_config/config_gemm_rs_sm80_tp1_nnodes1.cu
src/reduce_scatter/tuning_config/config_gemm_rs_sm80_tp2_nnodes1.cu
src/reduce_scatter/tuning_config/config_gemm_rs_sm80_tp4_nnodes1.cu
src/reduce_scatter/tuning_config/config_gemm_rs_sm80_tp8_nnodes1.cu
src/reduce_scatter/tuning_config/config_gemm_rs_sm90_tp1_nnodes1.cu
src/reduce_scatter/tuning_config/config_gemm_rs_sm90_tp2_nnodes1.cu
src/reduce_scatter/tuning_config/config_gemm_rs_sm90_tp4_nnodes1.cu
src/reduce_scatter/tuning_config/config_gemm_rs_sm90_tp8_nnodes1.cu
src/ths_op/CMakeLists.txt
src/ths_op/flux_shm.cc
src/ths_op/helper_ops.cc
src/ths_op/ths_op.cc
src/ths_op/topo_utils.cc
test/test_ag_kernel.py
test/test_ag_kernel_crossnode.py
test/test_ag_kernel_functional.py
test/test_ag_kernel_pyshmem.py
test/test_gemm_only.py
test/test_gemm_rs.py