framework,version,device,op_name,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,1,0.026005332668622334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,1,0.02645866572856903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,3,0.026911998788515728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,7,0.028351999819278717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,15,0.0349440003434817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,63,0.0356480007370313
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,3,0.029311999678611755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,127,0.04452799757321676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,255,0.07868800063927968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,255,0.07047466437021892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,7,0.027962667246659596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,511,0.18352532386779785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,511,0.11661866307258606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,1023,0.35014931360880536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,1023,0.23079466819763184
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,2047,0.682474692662557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,2047,0.44169068336486816
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,4095,1.3504692713419597
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,31,0.03515200068553289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,15,0.03501333296298981
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,31,0.03509333233038584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,127,0.0506879985332489
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,fp8,4095,0.8607359727223715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,1,0.00997866690158844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,64,128,1,float16,float16,63,0.03758399933576584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,3,0.009648000200589498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,3,0.009455999980370203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,7,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,1,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,15,0.009999999776482582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,15,0.009930666536092758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,7,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,31,0.010298666854699453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,31,0.010549332946538925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,63,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,63,0.012015999605258306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,127,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,255,0.012629333883523941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,255,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,511,0.016917333006858826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,127,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,511,0.016704000532627106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,1023,0.029626667499542236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,1023,0.029461334149042766
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,2047,0.03719466676314672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,4095,0.06426666676998138
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,2047,0.03803733239571253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,4095,0.05206400156021118
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,8191,0.11583466331164043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,8191,0.08932800094286601
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,float16,16383,0.2063093384106954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,1,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,64,128,1,float16,fp8,16383,0.19244267543156943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,1,0.010090666512648264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,3,0.009759999811649323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,3,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,7,0.010079999764760336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,15,0.010133333504199982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,31,0.010661333799362183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,7,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,31,0.01099733387430509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,15,0.010005333150426546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,63,0.012485332787036896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,63,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,127,0.0124746672809124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,127,0.012442667037248611
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,255,0.012778667112191519
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,511,0.028416000306606293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,511,0.028149334092934925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,1023,0.033376000821590424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,1023,0.03266133368015289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,255,0.012517333030700684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,2047,0.05367999772230784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,2047,0.045082668463389076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,4095,0.16221333543459573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,4095,0.07332266867160797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,8191,0.19246399402618408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,float16,16383,0.35815465450286865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,8191,0.13272000352541605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,1,0.04785066843032837
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,64,128,1,float16,fp8,16383,0.23962666591008505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,3,0.049866666396458946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,1,0.04894933104515076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,3,0.05091199775536855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,7,0.0524533341328303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,15,0.06452266871929169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,7,0.05163733164469401
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,31,0.06797866523265839
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,15,0.06478933493296306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,31,0.06461333235104878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,63,0.07258133093516032
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,127,0.08711999654769897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,255,0.1325546701749166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,63,0.06862399975458781
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,511,0.241482675075531
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,127,0.0906826655069987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,511,0.35522135098775226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,255,0.14562666416168213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,1023,0.6882186730702718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,1023,0.4469919999440511
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,float16,2047,1.3518400192260742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,64,128,1,float16,fp8,2047,0.8570773601531982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,1,0.00914666677514712
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,3,0.009370666618148485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,7,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,3,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,7,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,15,0.01009599988659223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,1,0.009338666374484697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,31,0.011722666521867117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,15,0.010415999839703241
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,31,0.01191466674208641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,63,0.011957333733638128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,127,0.011871999750534693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,127,0.012096000214417776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,255,0.013722666849692663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,511,0.01972266659140587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,63,0.011978667229413986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,511,0.019178666174411774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,255,0.013845333208640417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,1023,0.03030399978160858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,1023,0.050997331738471985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,2047,0.09456533193588257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,2047,0.08180266618728638
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,4095,0.17844265699386597
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,8191,0.34646399815877277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,float16,16383,0.6894613107045492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,16383,0.43346667289733887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,8191,0.22142932812372842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,64,128,1,float16,fp8,4095,0.11691733201344807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,1,0.09103467067082723
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,1,0.0934986670811971
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,3,0.09451733032862346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,3,0.09676800171534221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,7,0.09834667046864827
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,7,0.10041600465774536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,15,0.1288640002409617
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,31,0.1346773306528727
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,31,0.13388267159461975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,15,0.12430933117866516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,63,0.13739200433095297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,63,0.13652799526850382
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,127,0.17189866304397583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,127,0.16657599806785583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,255,0.27299733956654865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,255,0.26976533730824787
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,511,0.6896693706512451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,float16,1023,1.3605440457661946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,1023,0.866757313410441
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,64,128,1,float16,fp8,511,0.42761067549387616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,1,0.18176533778508505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,1,0.1770026683807373
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,3,0.1888693372408549
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,3,0.18835733334223428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,7,0.20379199584325156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,7,0.2076746622721354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,15,0.25297067562739056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,31,0.2601226568222046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,63,0.26466667652130127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,31,0.2609279950459798
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,63,0.27194132407506305
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,15,0.2561759948730469
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,127,0.3317813277244568
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,127,0.3260800043741862
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,float16,255,0.5263573328653971
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,64,128,1,float16,fp8,255,0.4992266496022542
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,float16,1,0.38040534655253094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,float16,3,0.40270400047302246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,fp8,1,0.38627731800079346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,float16,7,0.40953067938486737
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,fp8,7,0.4198453426361084
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,fp8,3,0.40229864915211994
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,fp8,15,0.5064160029093424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,float16,15,0.49908268451690674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,float16,31,0.5126933256785074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,fp8,31,0.5254186789194742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,float16,63,0.5215573310852051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,fp8,63,0.5220799843470255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,1,0.015087999403476715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,float16,127,0.6514240105946859
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,3,0.015295999745527903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,1,0.015386667102575302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,3,0.01553600033124288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,7,0.015247999380032221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,31,0.019952000429232914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,7,0.015770666301250458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,64,128,1,float16,fp8,127,0.6443306605021158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,15,0.01658133293191592
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,15,0.016890666137139004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,31,0.02015999952952067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,63,0.01995733380317688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,127,0.020154666155576706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,127,0.020367999871571858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,63,0.020138667275508244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,255,0.02492800106604894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,511,0.044938668608665466
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,511,0.03624533365170161
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,1023,0.06326933205127716
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,1023,0.15178666512171426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,255,0.023914667467276256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,2047,0.18138132492701212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,4095,0.22569066286087036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,4095,0.34809064865112305
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,8191,0.6818293730417887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,2047,0.12117866675059001
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,8191,0.4326773484547933
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,float16,16383,2.232704003651937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,64,128,1,float16,fp8,16383,0.854085365931193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,float16,3,0.7793599764506022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,float16,1,0.7564427057902018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,fp8,1,0.7769546508789062
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,fp8,3,0.7980426947275797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,fp8,7,0.832815965016683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,float16,15,1.0033600330352783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,float16,7,0.8116693496704102
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,fp8,15,1.006826639175415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,float16,31,1.0175466537475586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,fp8,31,1.0232906341552734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,float16,63,1.0342506567637126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,64,128,1,float16,fp8,63,1.035797357559204
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,float16,1,1.5037439664204915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,fp8,1,1.5808533032735188
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,float16,3,1.553210735321045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,float16,7,1.614975929260254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,1,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,fp8,7,1.6612799962361653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,fp8,15,2.006490707397461
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,fp8,3,1.5901333491007488
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,float16,15,1.974181334177653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,1,0.015216000378131866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,3,0.01525866612792015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,3,0.01573866605758667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,7,0.01605333387851715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,float16,31,2.02838404973348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,15,0.019679999599854153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,64,128,1,float16,fp8,31,2.0784266789754233
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,7,0.016165333489576977
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,31,0.01950399950146675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,15,0.019674666225910187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,31,0.01974933346112569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,63,0.01982933282852173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,63,0.019845332950353622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,127,0.02455466737349828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,127,0.023397333920001984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,255,0.035616000493367515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,511,0.09685867031415303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,511,0.0621919979651769
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,1023,0.18105600277582803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,1023,0.12158399820327759
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,255,0.043791999419530235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,2047,0.3466666539510091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,2047,0.3023413419723511
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,4095,0.681493361790975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,4095,0.43828264872233075
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,1,0.02060266708334287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,float16,8191,1.3812959988911946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,64,128,1,float16,fp8,8191,0.8785173098246256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,3,0.021365332106749218
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,3,0.02149333308140437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,7,0.021925332645575207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,7,0.0223786657055219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,1,0.021013334393501282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,15,0.027280000348885853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,15,0.027632000545660656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,31,0.027690666417280834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,31,0.0276053324341774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,63,0.028160000840822857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,127,0.039477333426475525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,255,0.06187200049559275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,63,0.02777066578467687
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,511,0.14095999797185263
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,255,0.054378668467203774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,127,0.033546666304270424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,511,0.08961066603660583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,1023,0.42975465456644696
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,2047,0.5145013332366943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,1023,0.1760586698849996
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,2047,0.3369599978129069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,float16,4095,1.0155253410339355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,1,0.009930666536092758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,3,0.0099093330403169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,48,48,128,1,float16,fp8,4095,0.6519146760304769
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,3,0.009589333087205887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,7,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,1,0.009610666582981745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,7,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,31,0.010533332824707031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,15,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,15,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,63,0.012080000092585882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,63,0.012069333344697952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,31,0.010858666151762009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,255,0.012341332932313284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,127,0.01221866657336553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,255,0.012448000411192576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,511,0.016965333372354507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,511,0.016634666671355564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,1023,0.029418667157491047
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,2047,0.03708266715208689
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,1023,0.029333333174387615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,127,0.012383999923865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,2047,0.03700799991687139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,4095,0.0639573335647583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,8191,0.14614400267601013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,4095,0.051498666405677795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,8191,0.0881226658821106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,float16,16383,0.19433599710464478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,48,48,128,1,float16,fp8,16383,0.15679466724395752
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,3,0.009759999811649323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,3,0.0100426667680343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,15,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,1,0.009952000031868616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,7,0.009957333405812582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,7,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,1,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,63,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,31,0.010559999694426855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,31,0.010591999938090643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,127,0.012330666184425354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,15,0.010330666477481524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,63,0.012181332955757776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,127,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,255,0.012304000556468964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,511,0.027893332143624622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,511,0.027647999425729115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,1023,0.032314665615558624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,1023,0.03254933406909307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,2047,0.04867733518282572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,255,0.012826666235923767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,2047,0.043765331308046974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,4095,0.08956266442934673
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,4095,0.07074133555094402
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,8191,0.23263466358184814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,16383,0.2269279956817627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,fp8,8191,0.12409067153930664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,48,48,128,1,float16,float16,16383,0.2934346596399943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,1,0.038133333126703896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,3,0.03877866764863332
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,3,0.03968533376852671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,7,0.040074666341145836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,7,0.04071466624736786
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,1,0.037205333511034645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,31,0.05085866649945577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,15,0.050154666105906166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,15,0.05041599770387014
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,127,0.07156800230344136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,31,0.05032533407211304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,63,0.05099200208981832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,63,0.05735466877619425
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,255,0.11300800244013469
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,511,0.2686240077018738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,511,0.1699946721394857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,255,0.10124267141024272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,127,0.06750933329264323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,1023,0.4522826671600342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,1023,0.5196533203125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,fp8,2047,0.6516693433125814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,1,0.009125333279371262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,48,48,128,1,float16,float16,2047,1.018122673034668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,1,0.009194666519761086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,3,0.009306666751702627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,7,0.009130666653315226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,7,0.00943999985853831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,15,0.00996800015370051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,15,0.01027199998497963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,31,0.011717333147923151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,31,0.011792000383138657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,3,0.009530666594703993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,63,0.011717333147923151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,63,0.011786667009194693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,127,0.01198400060335795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,127,0.011920000116030375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,255,0.013738666971524557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,511,0.0191040001809597
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,255,0.013845333208640417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,1023,0.03384000062942505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,1023,0.030181333422660828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,2047,0.07481599847475688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,2047,0.0580213318268458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,4095,0.11083199580510457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,4095,0.1402293344338735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,511,0.019130667050679524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,8191,0.2696479956309001
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,8191,0.20960533618927002
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,float16,16383,0.8528479735056559
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,48,48,128,1,float16,fp8,16383,0.40834665298461914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,1,0.07137066622575124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,1,0.06955733398596446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,3,0.07241066793600719
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,7,0.07460266848405202
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,3,0.07423466444015503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,7,0.07646933197975159
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,15,0.09583999713261922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,31,0.10249599814414978
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,15,0.094458669424057
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,63,0.1056106686592102
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,63,0.10427733262379964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,31,0.09703466296195984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,127,0.12759466965993246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,255,0.19366933902104697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,255,0.27854400873184204
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,511,0.520853320757548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,511,0.3237066666285197
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,1023,1.0260533491770427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,fp8,1023,0.6547040144602457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,48,48,128,1,float16,float16,127,0.13109333316485086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,1,0.13433600465456644
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,1,0.1383626659711202
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,7,0.1540426711241404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,3,0.13953066865603128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,7,0.1497813363869985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,3,0.14257599910100302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,15,0.1922826568285624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,31,0.19779199361801147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,31,0.19785600900650024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,15,0.19091200828552246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,63,0.2015733321507772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,63,0.2063466707865397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,127,0.25196266174316406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,127,0.2465546727180481
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,float16,255,0.4004426797231038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,48,48,128,1,float16,fp8,255,0.3779040177663167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,float16,1,0.27801066637039185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,float16,3,0.297818660736084
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,float16,7,0.3096426725387573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,fp8,3,0.3003573417663574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,fp8,7,0.31727466980616253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,float16,15,0.3770506779352824
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,fp8,1,0.27480532725652057
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,fp8,15,0.3827413320541382
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,float16,31,0.38757868607838947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,fp8,31,0.38894931475321454
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,fp8,63,0.40351466337839764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,float16,63,0.3940053383509318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,1,0.01524266724785169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,float16,127,0.49161068598429364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,48,48,128,1,float16,fp8,127,0.48583467801411945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,3,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,1,0.014789332946141561
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,3,0.015423999478419622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,7,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,7,0.01545599972208341
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,15,0.015098666151364645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,15,0.015450666348139444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,31,0.01695466662446658
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,31,0.016986666868130367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,63,0.017397332936525345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,127,0.01740266631046931
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,255,0.020597333709398907
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,127,0.017386666188637417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,255,0.020234666764736176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,511,0.03659733384847641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,511,0.028880000114440918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,63,0.01691199963291486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,1023,0.08160000046094258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,1023,0.05708266794681549
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,2047,0.15131733814875284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,2047,0.09789333740870158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,4095,0.2860906720161438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,4095,0.19409600893656412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,8191,0.5556586583455404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,float16,16383,1.1075572967529297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,16383,1.0146559874216716
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,48,48,128,1,float16,fp8,8191,0.3702400128046672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,float16,1,0.5716106494267782
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,fp8,1,0.5869706471761068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,float16,3,0.5888906717300415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,fp8,3,0.6026240189870199
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,float16,7,0.6125760078430176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,fp8,7,0.6297973394393921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,float16,15,0.7565333048502604
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,fp8,15,0.7578933238983154
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,fp8,31,0.7707040309906006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,fp8,63,0.7798293431599935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,float16,63,0.778538703918457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,48,48,128,1,float16,float16,31,0.7680319945017496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,fp8,1,1.167962630589803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,float16,1,1.1355520089467366
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,float16,3,1.1990826924641926
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,float16,7,1.2179839611053467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,1,0.014266667266686758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,float16,15,1.4860480626424153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,fp8,7,1.252773364384969
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,fp8,15,1.510682741800944
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,fp8,3,1.1976373195648193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,1,0.014645333091417948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,3,0.01422400027513504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,3,0.015178666760524115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,7,0.014490666488806406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,7,0.014394666999578476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,float16,31,1.5286134084065754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,48,48,128,1,float16,fp8,31,1.5348480542500813
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,15,0.0163680004576842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,31,0.01616000011563301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,15,0.01621333385507266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,31,0.016341333587964375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,63,0.016442666451136272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,63,0.01637866720557213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,127,0.01942933350801468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,127,0.019199999670187633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,255,0.03586133321126302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,255,0.027818667391935985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,511,0.05083199838797251
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,1023,0.15030933419863382
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,1023,0.0974773367245992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,511,0.07879999776681264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,2047,0.19341866175333658
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,2047,0.28646934032440186
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,4095,0.5570240020751953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,4095,0.497104008992513
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,float16,8191,1.1034453709920247
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,48,48,128,1,float16,fp8,8191,0.7438186804453532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,1,0.019600000232458115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,7,0.020154666155576706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,3,0.019920000185569126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,3,0.020266667008399963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,7,0.020341333001852036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,15,0.0235359991590182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,15,0.0236160010099411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,31,0.0236160010099411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,1,0.01989866668979327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,31,0.02372266600529353
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,63,0.02367466688156128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,127,0.02796799937884013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,127,0.04008533308903376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,63,0.02404266595840454
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,255,0.04711999992529551
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,255,0.0552106648683548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,1023,0.15060800313949585
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,511,0.07794133325417836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,2047,0.712224006652832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,1023,0.23528534173965454
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,511,0.12283733487129211
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,2047,0.2967306574185689
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,float16,4095,0.8944799900054932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,40,40,128,1,float16,fp8,4095,0.5736266771952311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,1,0.009488000224033991
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,1,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,3,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,7,0.010048000141978264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,3,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,15,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,31,0.010458666831254959
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,31,0.010842667271693548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,7,0.009648000200589498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,63,0.012223999947309494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,15,0.010186666622757912
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,63,0.012261333564917246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,127,0.012256000190973282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,127,0.012186666329701742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,255,0.012448000411192576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,255,0.012448000411192576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,511,0.016821333517630894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,1023,0.0179626668492953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,1023,0.017797333498795826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,2047,0.03213333338499069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,2047,0.03221333275238673
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,511,0.01661866654952367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,8191,0.0883840024471283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,4095,0.04353600243727366
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,4095,0.04557333389918009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,8191,0.06890133519967397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,fp8,16383,0.1237386663754781
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,40,40,128,1,float16,float16,16383,0.15260266264279684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,1,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,3,0.009722666814923286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,3,0.0100426667680343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,7,0.009701333319147428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,15,0.009952000031868616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,7,0.009866666669646898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,1,0.009877333417534828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,31,0.010687999427318573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,15,0.010368000095089277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,31,0.010543999572594961
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,63,0.012448000411192576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,63,0.01227733368674914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,127,0.012437333663304647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,255,0.012378666549921036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,127,0.01240533341964086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,511,0.027813332776228588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,511,0.027701333165168762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,1023,0.0322026660044988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,255,0.012554666648308435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,2047,0.046207999189694725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,2047,0.04334400097529093
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,1023,0.03230933348337809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,4095,0.08821866909662883
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,4095,0.06866666674613953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,8191,0.12360533078511556
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,8191,0.20726933081944784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,1,0.03166933357715607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,fp8,16383,0.2230559984842936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,1,0.03236266722281774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,40,40,128,1,float16,float16,16383,0.2817866603533427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,3,0.03282133241494497
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,3,0.03378133227427801
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,7,0.03405333310365677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,7,0.03461866577466329
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,15,0.04238933324813843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,15,0.04274133344491323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,31,0.04303466777006785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,63,0.048485333720842995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,31,0.0430026650428772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,63,0.04322666426499685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,127,0.06073066592216492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,127,0.06043733159701029
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,255,0.0960746705532074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,255,0.08614400029182434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,511,0.22535467147827148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,511,0.14361066619555155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,1023,0.4350080092748006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,1023,0.2869066596031189
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,float16,2047,0.8519360224405924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,40,40,128,1,float16,fp8,2047,0.7374666531880697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,1,0.016021333634853363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,3,0.01588800052801768
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,1,0.016528000434239704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,3,0.0164533331990242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,31,0.017722666263580322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,31,0.018053332964579265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,7,0.01584533353646596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,7,0.01646399994691213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,15,0.01647466669480006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,15,0.016890666137139004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,63,0.02128000060717265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,63,0.021189334491888683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,127,0.021226666867733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,127,0.021365332106749218
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,511,0.02475733309984207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,255,0.021642667551835377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,255,0.021482666333516438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,1023,0.03942933430274328
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,511,0.02457600086927414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,1023,0.03473066786924998
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,2047,0.08211199939250946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,2047,0.05950400233268738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,4095,0.14705066879590353
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,4095,0.11308800180753072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,8191,0.2746079961458842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,8191,0.20891199509302774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,float16,16383,0.77237868309021
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,40,40,128,1,float16,fp8,16383,0.4018826484680176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,1,0.05878399809201559
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,1,0.06020266811052958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,3,0.061008001367251076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,3,0.062463998794555664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,7,0.06299200157324474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,7,0.06443200012048085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,15,0.07952000200748444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,31,0.08637332916259766
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,15,0.07959466675917308
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,31,0.0801333338022232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,63,0.08947733044624329
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,63,0.08756267031033833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,127,0.11158399780591328
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,127,0.10749333103497823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,255,0.23638399442036948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,255,0.16179733475049338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,511,0.43615468343098956
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,511,0.27113600571950275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,float16,1023,0.8572266896565756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,1,0.11282133062680562
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,3,0.11711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,1,0.1160586675008138
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,40,40,128,1,float16,fp8,1023,0.5467199881871542
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,3,0.11979732910792033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,7,0.1246506671110789
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,7,0.12483732899030049
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,15,0.1613653302192688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,15,0.15755200386047363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,31,0.17163199186325073
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,63,0.16823999087015787
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,31,0.165994664033254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,63,0.16907199223836264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,127,0.21167999505996704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,127,0.20719999074935913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,float16,255,0.3378239870071411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,40,40,128,1,float16,fp8,255,0.3165066639582316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,fp8,1,0.2270453373591105
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,float16,1,0.22605866193771362
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,float16,3,0.24591465791066489
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,fp8,3,0.239247997601827
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,float16,7,0.25990933179855347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,fp8,7,0.26366400718688965
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,fp8,15,0.3208799958229065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,float16,31,0.32444800933202106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,float16,15,0.3158079981803894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,fp8,31,0.33241067330042523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,float16,63,0.3303146759668986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,fp8,63,0.3290346662203471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,float16,127,0.41203733285268146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,40,40,128,1,float16,fp8,127,0.40670935312906903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,1,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,1,0.014607999473810196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,3,0.014682666709025701
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,3,0.014293332894643148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,7,0.014229333649079004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,7,0.014720000326633453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,15,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,15,0.015109332899252573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,31,0.016773333152135212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,31,0.016906666258970898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,63,0.016858667135238647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,127,0.017055999487638474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,127,0.017210666090250015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,63,0.017008000363906223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,255,0.01985599969824155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,511,0.0322026660044988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,255,0.020074666788180668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,511,0.028181334336598713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,1023,0.10823999842007954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,1023,0.05141866703828176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,2047,0.144405335187912
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,2047,0.09705600142478943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,4095,0.17985600233078003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,8191,0.5341920057932535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,4095,0.27346134185791016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,8191,0.3638293345769246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,float16,16383,1.5310079256693523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,40,40,128,1,float16,fp8,16383,0.715178648630778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,float16,1,0.4769599835077922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,fp8,1,0.4909173250198364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,float16,3,0.4923786719640096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,fp8,3,0.5040693283081055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,float16,7,0.5126239856084188
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,fp8,7,0.527077317237854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,float16,15,0.6328746477762858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,fp8,15,0.6349973281224569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,float16,31,0.6413066784540812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,fp8,31,0.6445120175679525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,float16,63,0.6506719986597697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,40,40,128,1,float16,fp8,63,0.6515573263168335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,float16,1,0.9488586584726969
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,float16,3,1.0014453728993733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,fp8,1,0.9751413663228353
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,fp8,3,1.0011573632558186
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,float16,7,1.016858657201131
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,fp8,7,1.0470613638559978
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,float16,15,1.240938663482666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,fp8,15,1.2629226843516033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,1,0.013936000565687815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,1,0.014010666559139887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,3,0.014015999933083853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,3,0.014042666802803675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,7,0.013978666315476099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,float16,31,1.2769546508789062
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,7,0.014122666170199713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,15,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,40,40,128,1,float16,fp8,31,1.306874672571818
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,15,0.015344000111023584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,31,0.01533866673707962
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,63,0.015487999965747198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,31,0.015413332730531693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,63,0.015487999965747198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,127,0.017765333255132038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,127,0.017871999492247898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,511,0.045114666223526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,255,0.03467733412981033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,255,0.02492266645034154
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,1023,0.14295466740926108
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,1023,0.09553066889444987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,511,0.0728053351243337
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,2047,0.2765493392944336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,2047,0.17733333508173624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,4095,0.5398826599121094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,4095,0.4668639898300171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,1,0.014959999670584997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,fp8,8191,0.7211253643035889
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,40,40,128,1,float16,float16,8191,0.9806719621022543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,1,0.01523200049996376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,3,0.015418666104475657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,3,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,7,0.01605333387851715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,7,0.01621866722901662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,15,0.019482667247454327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,15,0.019717333217461903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,31,0.019509332875410717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,31,0.01958400011062622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,63,0.019760000209013622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,127,0.024405332903067272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,63,0.019786667078733444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,127,0.023333333432674408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,255,0.0359946663180987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,255,0.04393066465854645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,1023,0.180842657883962
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,511,0.09760000308354695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,511,0.06211733321348826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,1023,0.12143466869990031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,2047,0.34721601009368896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,2047,0.30315732955932617
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,4095,0.6817973454793295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,1,0.009472000102202097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,float16,8191,1.3782560030619304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,4095,0.44037334124247235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,1,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,32,128,1,float16,fp8,8191,0.8792373339335123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,3,0.009743999689817429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,3,0.009914666414260864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,31,0.010277333358923594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,7,0.009808000177145004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,15,0.010122666756312052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,7,0.009775999933481216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,15,0.009658666948477427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,31,0.010832000523805618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,63,0.012261333564917246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,127,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,63,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,255,0.012608000387748083
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,511,0.016437333077192307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,127,0.012256000190973282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,255,0.01210133358836174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,511,0.016613333175579708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,1023,0.018021332720915478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,1023,0.017711999515692394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,2047,0.02958400050799052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,2047,0.02959999938805898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,4095,0.04065066576004028
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,4095,0.039962666730086006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,8191,0.06014933188756307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,8191,0.07770133515199025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,16383,0.1283146639664968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,16383,0.10307733217875163
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,1,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,float16,32767,0.23220799366633096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,32,128,1,float16,fp8,32767,0.2176533341407776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,1,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,3,0.00956266683836778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,3,0.009941333283980688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,7,0.009919999788204828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,7,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,15,0.010197333370645842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,15,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,31,0.010453333457310995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,31,0.010847999403874079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,63,0.012335999558369318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,127,0.012367999802033106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,63,0.012319999436537424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,127,0.01227733368674914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,255,0.01259200026591619
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,255,0.012298667182525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,511,0.016927999754746754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,511,0.017024000485738117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,1023,0.029722665747006733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,1023,0.02962133288383484
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,4095,0.06534400085608165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,2047,0.037871999045213066
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,4095,0.05271466573079427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,2047,0.037290667494138084
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,8191,0.16236799955368042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,8191,0.08950400352478027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,16383,0.2057173252105713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,16383,0.15974400440851846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,1,0.026122666895389557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,float16,32767,0.38603198528289795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,1,0.026752000053723652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,32,128,1,float16,fp8,32767,0.2940000096956889
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,3,0.027045334378878277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,3,0.02773333340883255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,7,0.02810666710138321
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,7,0.02845866729815801
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,15,0.03497066597143809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,15,0.03518400092919668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,31,0.03545066714286804
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,31,0.03537066777547201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,63,0.03794133414824804
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,63,0.035605333745479584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,127,0.05082666873931885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,127,0.044138665000597634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,255,0.07074666519959767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,255,0.0794239987929662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,511,0.1838080088297526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,511,0.11659199992815654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,1023,0.5683786471684774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,1023,0.23246399561564127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,2047,0.6822293599446615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,2047,0.44225064913431805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,float16,4095,1.3499147097269695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,32,128,1,float16,fp8,4095,0.8615360260009766
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,1,0.00996800015370051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,1,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,3,0.009754666437705358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,3,0.010181333248813948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,15,0.009888000165422758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,7,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,15,0.010346666599313417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,7,0.01007466639081637
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,31,0.010559999694426855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,63,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,63,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,31,0.010928000013033548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,127,0.012416000167528788
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,255,0.013072000195582708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,127,0.012383999923865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,255,0.012442667037248611
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,511,0.028378665447235107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,511,0.027999999622503918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,1023,0.0332640012105306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,1023,0.03310933212439219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,2047,0.05394133428732554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,2047,0.04452266792456309
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,4095,0.10851732889811198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,4095,0.07314133147398631
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,8191,0.19200533628463745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,8191,0.17137600978215536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,16383,0.3591573238372803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,16383,0.23964265982309976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,float16,32767,0.6955306529998779
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,32,128,1,float16,fp8,32767,0.4545706510543823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,1,0.04939733445644379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,1,0.04822933177153269
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,3,0.050016000866889954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,3,0.05128000179926554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,7,0.051925331354141235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,7,0.05287466446558634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,15,0.06491733094056447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,15,0.06486933430035909
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,31,0.06865066786607106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,31,0.06519466638565063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,63,0.07264000177383423
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,63,0.07107200225194295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,127,0.09167466560999553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,255,0.1455946664015452
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,127,0.08773333827654521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,255,0.13245866696039835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,511,0.3517920176188151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,511,0.2220319906870524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,1023,0.689680020014445
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,1023,0.599781314531962
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,float16,2047,1.3520053227742512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,1,0.09379733602205913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,1,0.09155199925104777
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,32,128,1,float16,fp8,2047,0.8592000007629395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,3,0.09465066591898601
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,3,0.0972160001595815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,7,0.0990559955437978
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,7,0.10080533226331075
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,15,0.12520533800125122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,15,0.12981866796811423
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,63,0.1372266709804535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,31,0.13496533036231995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,31,0.13241599996884665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,63,0.13678399721781412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,127,0.17124267419179282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,127,0.17513600985209146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,255,0.27378666400909424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,255,0.2558133403460185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,float16,511,0.6897386709849039
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,32,128,1,float16,fp8,511,0.4283786614735921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,1,0.17778666814168295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,1,0.18238399426142374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,3,0.18978132804234824
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,7,0.20896534125010172
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,3,0.18889600038528442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,7,0.20355733235677084
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,15,0.25429866711298627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,31,0.261952002843221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,15,0.25818665822347003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,31,0.2624640067418416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,63,0.26554133494695026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,63,0.2656480073928833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,127,0.3758186499277751
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,1,0.009290666629870733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,float16,255,0.5241279999415079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,127,0.3271413246790568
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,32,128,1,float16,fp8,255,0.5013120174407959
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,1,0.009312000125646591
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,3,0.009317333499590555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,3,0.009423999736706415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,7,0.00921066664159298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,7,0.00984533317387104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,15,0.010122666756312052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,31,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,15,0.010330666477481524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,31,0.011930666863918304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,63,0.011893333246310553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,63,0.01192533348997434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,255,0.013845333208640417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,255,0.013962666193644205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,127,0.011909333368142446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,127,0.012080000092585882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,511,0.01970133309563001
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,511,0.019365333020687103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,1023,0.05169600248336792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,2047,0.09451199571291606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,1023,0.030394665896892548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,4095,0.28904000918070477
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,4095,0.1169653336207072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,2047,0.06384533147017162
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,8191,0.3474293152491252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,8191,0.22209066152572632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,16383,0.43560532728830975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,16383,0.6901386578877767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,float16,32767,1.5162560145060222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,32,128,1,float16,fp8,32767,1.5317920049031575
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,fp8,1,0.38811198870340985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,float16,3,0.3959466616312663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,fp8,3,0.40460264682769775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,fp8,7,0.4230666557947795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,float16,15,0.5017600059509277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,fp8,15,0.5165973504384359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,float16,7,0.4123306671778361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,float16,1,0.3835359811782837
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,float16,31,0.5156906843185425
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,fp8,31,0.5177973508834839
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,float16,63,0.5232906738917033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,fp8,63,0.5226240158081055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,float16,127,0.6531039873758951
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,32,128,1,float16,fp8,127,0.668837308883667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,float16,1,0.7613226572672526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,float16,3,0.7840533256530762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,fp8,3,0.8030827045440674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,float16,7,0.8174986839294434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,fp8,7,0.8402132987976074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,fp8,1,0.7827733357747396
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,float16,15,0.99618132909139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,fp8,15,1.0260159969329834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,1,0.015013333410024643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,float16,31,1.025312026341756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,1,0.01552533358335495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,3,0.015439999600251516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,3,0.015722667177518208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,fp8,31,1.0281493663787842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,float16,63,1.0373706817626953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,7,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,32,128,1,float16,fp8,63,1.0387039979298909
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,7,0.015626666446526844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,15,0.016810666769742966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,15,0.017024000485738117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,31,0.01987733319401741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,31,0.02029866725206375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,63,0.020186666399240494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,63,0.02022933339079221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,127,0.020448000480731327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,127,0.02038399999340375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,255,0.025018667181332905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,255,0.02382933348417282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,511,0.0584799995024999
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,511,0.036373332142829895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,1023,0.06332799792289734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,1023,0.09774933258692424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,2047,0.18097599347432455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,2047,0.12172266840934753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,8191,1.1205600102742512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,4095,0.22574400901794434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,8191,0.4328746795654297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,4095,0.34786665439605713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,1,0.014256000518798828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,float16,16383,1.3571359316507976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,1,0.014346666634082794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,32,128,1,float16,fp8,16383,0.8571253617604574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,3,0.014469332993030548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,3,0.014618666221698126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,7,0.014805333067973455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,7,0.014730667074521383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,15,0.01609066625436147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,15,0.016341333587964375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,31,0.01639466608564059
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,31,0.016607999801635742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,63,0.016506666938463848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,63,0.016549333930015564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,255,0.035760000348091125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,127,0.01926933353145917
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,255,0.027514666318893433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,511,0.07845333218574524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,127,0.026538667579491932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,511,0.0510453333457311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,1023,0.2227893273035685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,1023,0.09935466448465984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,2047,0.2866986592610677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,2047,0.19428799549738565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,4095,0.5565706491470337
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,4095,0.3729333480199178
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,1,0.009349333122372627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,fp8,8191,0.7419679959615072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,1,0.009573333586255709
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,24,24,128,1,float16,float16,8191,1.1019946734110515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,3,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,7,0.009359999870260557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,15,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,7,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,31,0.010186666622757912
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,15,0.010191999996701876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,3,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,31,0.010341333225369453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,63,0.012245333443085352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,127,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,63,0.01192533348997434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,127,0.0122079998254776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,255,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,255,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,1023,0.01754666616519292
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,511,0.016607999801635742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,511,0.01653333380818367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,1023,0.01754133279124896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,2047,0.01794133335351944
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,2047,0.017792000124851864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,4095,0.03737066686153412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,4095,0.03713600089152654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,8191,0.06247999767462412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,8191,0.05161066850026449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,16383,0.10914666453997295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,1,0.00961599995692571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,16383,0.0963200032711029
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,float16,32767,0.1962133248647054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,24,24,128,1,float16,fp8,32767,0.1564479966958364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,1,0.01002133327225844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,3,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,3,0.010079999764760336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,7,0.009743999689817429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,7,0.00983466642598311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,15,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,15,0.010191999996701876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,63,0.012346666306257248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,31,0.010640000303586325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,31,0.010431999961535135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,63,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,127,0.012223999947309494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,511,0.016789333273967106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,255,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,255,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,511,0.016704000532627106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,127,0.01211200033624967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,1023,0.02942399928967158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,2047,0.037274666130542755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,2047,0.03702933341264725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,4095,0.05126399795214335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,8191,0.10989333192507426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,4095,0.07519466678301494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,1023,0.0296426663796107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,16383,0.19537599881490073
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,8191,0.08777067065238953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,16383,0.15687466661135355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,1,0.02070933332045873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,float16,32767,0.36950401465098065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,24,24,128,1,float16,fp8,32767,0.28811200459798175
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,1,0.020853333175182343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,7,0.02197333425283432
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,3,0.02149333308140437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,3,0.021664001047611237
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,7,0.022309333086013794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,15,0.02741866558790207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,15,0.027717334528764088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,63,0.028170667588710785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,31,0.027600000301996868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,31,0.02773333340883255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,63,0.027935999135176342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,127,0.03996799886226654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,127,0.03365866591533025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,255,0.0621919979651769
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,255,0.05463466544946035
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,1023,0.1768959959348043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,511,0.09968533118565877
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,511,0.14044266939163208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,1023,0.26608532667160034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,2047,0.5155786673227946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,2047,0.337007999420166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,float16,4095,1.016426642735799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,24,24,128,1,float16,fp8,4095,0.6526933511098226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,1,0.0100426667680343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,1,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,3,0.00966933307548364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,7,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,3,0.009941333283980688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,15,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,15,0.01020800011853377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,31,0.010762666662534079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,63,0.012319999436537424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,7,0.010128000130256018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,31,0.010426666587591171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,63,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,127,0.012282667060693106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,127,0.012341332932313284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,255,0.012896000097195307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,511,0.02792533238728841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,511,0.02794133375088374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,255,0.01259200026591619
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,1023,0.03218133250872294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,1023,0.03236799935499827
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,2047,0.048170665899912514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,2047,0.043706665436426796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,4095,0.09001066287358601
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,4095,0.07062933345635732
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,8191,0.1588266690572103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,8191,0.12496532996495564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,16383,0.4397653341293335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,16383,0.2260319987932841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,float16,32767,0.5608746608098348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,24,24,128,1,float16,fp8,32767,0.42696531613667804
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,1,0.03729599962631861
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,3,0.03867733230193456
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,1,0.038245332737763725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,3,0.03973866750796636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,7,0.04009599983692169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,7,0.04080000023047129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,15,0.050160000721613564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,15,0.05067733426888784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,31,0.05089599887530009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,31,0.05049066742261251
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,63,0.051216001311937966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,63,0.056746666630109154
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,127,0.08657067020734151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,127,0.06758933266003926
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,255,0.11302933096885681
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,255,0.10195199648539226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,511,0.26796799898147583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,511,0.1699626644452413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,1023,0.5209759871164957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,1023,0.3394346634546916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,float16,2047,1.6723413467407227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,24,24,128,1,float16,fp8,2047,0.6529279947280884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,1,0.06967466572920482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,1,0.07145066559314728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,3,0.07246399919191997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,3,0.07431999842325847
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,7,0.07499733567237854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,7,0.07667199770609538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,15,0.09622933467229207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,15,0.09497599800427754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,31,0.09802132844924927
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,31,0.10274133086204529
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,63,0.10551466544469197
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,63,0.10404800375302632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,127,0.13190933068593344
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,127,0.12742400169372559
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,255,0.27853866418202716
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,255,0.19377599159876505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,float16,511,0.5176000197728475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,24,24,128,1,float16,fp8,511,0.32345600922902423
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,1,0.13447999954223633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,1,0.13834133744239807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,3,0.1397706667582194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,3,0.14294933279355368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,7,0.15637333194414774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,7,0.15016532937685648
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,15,0.1928373376528422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,31,0.19833600521087646
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,15,0.19079999128977457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,31,0.19875200589497885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,63,0.20188266038894653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,63,0.20071999231974283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,127,0.2878986597061157
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,127,0.24715733528137207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,float16,255,0.40058668454488117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,1,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,1,0.009178666397929192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,24,24,128,1,float16,fp8,255,0.3780053456624349
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,3,0.009279999881982803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,3,0.009472000102202097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,7,0.009029333169261614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,7,0.009381333366036415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,15,0.010048000141978264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,15,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,31,0.011855999628702799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,63,0.01181866725285848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,31,0.011584000041087469
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,63,0.01190399999419848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,127,0.01180800050497055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,127,0.011861333002646765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,255,0.013845333208640417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,255,0.013663999736309052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,511,0.019120000302791595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,511,0.019098666807015736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,1023,0.03336533407370249
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,1023,0.03010133405526479
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,2047,0.11547733346621196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,2047,0.058373332023620605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,4095,0.13896000385284424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,4095,0.11086933811505635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,8191,0.26946133375167847
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,8191,0.20921067396799722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,16383,0.5306186676025391
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,16383,0.40637866655985516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,fp8,32767,1.2446506818135579
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,24,24,128,1,float16,float16,32767,1.8727839787801106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,fp8,1,0.27584532896677655
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,float16,1,0.2788319985071818
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,float16,3,0.29873067140579224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,fp8,3,0.2963893413543701
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,float16,7,0.3112106720606486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,fp8,7,0.3192320068677266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,float16,15,0.3784426848093669
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,float16,31,0.38896532853444415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,fp8,15,0.389354666074117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,fp8,63,0.39418665568033856
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,fp8,31,0.3896586497624715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,float16,63,0.3949600060780843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,float16,127,0.4925493399302165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,24,24,128,1,float16,fp8,127,0.4856586853663127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,float16,1,0.572869340578715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,fp8,1,0.5895359913508097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,float16,3,0.6043359835942587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,float16,7,0.6156959931055704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,fp8,3,0.6044319868087769
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,fp8,7,0.6342613299687704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,float16,15,0.7494666576385498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,fp8,15,0.7621653079986572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,1,0.014826666563749313
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,1,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,3,0.015061333775520325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,float16,31,0.7704106966654459
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,3,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,fp8,31,0.7734986941019694
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,float16,63,0.8142773310343424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,24,24,128,1,float16,fp8,63,0.7798399925231934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,7,0.015040000279744467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,7,0.015354666858911514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,15,0.015247999380032221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,15,0.015471999843915304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,31,0.016864000509182613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,31,0.01735466718673706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,63,0.01714666684468587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,127,0.01754666616519292
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,63,0.017301333447297413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,255,0.02045866722861926
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,127,0.017445333302021027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,255,0.020389333367347717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,511,0.028698667883872986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,511,0.0359946663180987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,1023,0.11196266611417134
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,1023,0.05301333467165629
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,2047,0.09827199578285217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,4095,0.2863733371098836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,4095,0.19406400124231973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,2047,0.15099733074506125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,8191,0.5568586587905884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,8191,0.37112534046173096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,float16,16383,1.7065919240315754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,3,0.01524266724785169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,24,24,128,1,float16,fp8,16383,0.7322666645050049
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,1,0.015546667079130808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,1,0.015066667149464289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,7,0.015423999478419622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,3,0.01570133368174235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,7,0.015765332927306492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,15,0.016554666062196095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,15,0.016927999754746754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,31,0.019962667177120846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,31,0.020293333878119785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,63,0.020330666253964107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,63,0.020128000527620316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,127,0.020410666863123577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,127,0.02042666698495547
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,255,0.02533866713444392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,255,0.023919999599456787
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,511,0.045040001471837364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,1023,0.09758933385213216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,511,0.03625066578388214
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,1023,0.06343466540177663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,2047,0.18095999956130981
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,2047,0.12141333023707072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,4095,0.2260106603304545
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,4095,0.5651359955469767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,8191,0.43378134568532306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,8191,0.6811626752217611
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,float16,16383,1.3600266774495442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,16,128,1,float16,fp8,16383,0.8589973449707031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,1,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,3,0.009269333134094873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,3,0.009573333586255709
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,1,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,7,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,7,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,15,0.009722666814923286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,15,0.009808000177145004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,31,0.010122666756312052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,31,0.010533332824707031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,63,0.01202133297920227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,63,0.01198400060335795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,127,0.01221866657336553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,127,0.0120319997270902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,255,0.012346666306257248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,255,0.01211200033624967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,511,0.016336000214020412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,1023,0.01741333305835724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,511,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,2047,0.017802666872739792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,1023,0.01759999990463257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,2047,0.01775466650724411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,4095,0.029535998900731403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,4095,0.029685333371162415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,8191,0.040735999743143715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,8191,0.04046933352947235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,16383,0.06145066519578298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,16383,0.08055999875068665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,32767,0.14074132839838663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,32767,0.11492799719174702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,float16,65535,0.25572800636291504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,1,0.009482666850090027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,16,128,1,float16,fp8,65535,0.20212799310684204
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,1,0.009749333063761393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,3,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,3,0.00983466642598311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,7,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,7,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,15,0.009999999776482582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,31,0.010362666721145311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,15,0.0100426667680343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,63,0.012282667060693106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,31,0.01044800008336703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,63,0.01219733307758967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,127,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,127,0.012069333344697952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,255,0.012245333443085352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,511,0.016751999656359356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,255,0.012250666817029318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,1023,0.01754133279124896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,511,0.016607999801635742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,1023,0.01759999990463257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,2047,0.02958400050799052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,4095,0.04008533308903376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,4095,0.040448000033696495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,2047,0.02957333376010259
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,8191,0.06009600063165029
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,16383,0.10462400317192078
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,16383,0.1285973290602366
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,8191,0.07525866727034251
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,32767,0.23186665773391724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,32767,0.18291199207305908
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,1,0.014959999670584997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,1,0.015216000378131866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,3,0.015626666446526844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,float16,65535,0.44124801953633624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,3,0.01562133307258288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,16,128,1,float16,fp8,65535,0.3440320094426473
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,7,0.01609066625436147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,7,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,15,0.019424000134070713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,15,0.01947733387351036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,31,0.019776000330845516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,63,0.01979200045267741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,31,0.019541333119074505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,127,0.0245919997493426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,63,0.019839999576409657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,127,0.023578666150569916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,255,0.05726400017738342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,255,0.036042665441830955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,511,0.09717866778373718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,511,0.062021334966023765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,1023,0.12180266777674358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,1023,0.18146665891011557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,2047,0.3476159969965617
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,2047,0.22918933629989624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,4095,1.1176586945851643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,1,0.009679999823371569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,4095,0.4402666489283244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,1,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,float16,8191,1.3813385963439941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,3,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,16,128,1,float16,fp8,8191,0.8811946709950765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,3,0.009957333405812582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,7,0.009733333562811216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,15,0.00996800015370051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,7,0.009925333162148794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,15,0.010277333358923594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,31,0.010570666442314783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,63,0.012309333930412928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,63,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,127,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,127,0.012282667060693106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,255,0.0124746672809124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,255,0.012319999436537424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,511,0.016832000265518825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,511,0.017093333105246227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,1023,0.029658667743206024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,1023,0.029674666623274486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,2047,0.03809600075085958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,2047,0.03728000074625015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,4095,0.08195200065771739
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,8191,0.11482666929562886
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,4095,0.0525546669960022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,8191,0.09121066331863403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,16383,0.21266667048136392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,16383,0.15983999768892923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,32767,0.3850880066553752
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,32767,0.29505600531895954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,float16,65535,1.1357866923014324
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,1,0.026250667870044708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,1,0.02665599932273229
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,16,128,1,float16,fp8,65535,0.5611413319905599
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,3,0.027082666754722595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,3,0.027845333019892376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,7,0.027962667246659596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,7,0.028629332780838013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,15,0.03514666606982549
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,15,0.03522133330504099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,31,0.035445332527160645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,31,0.03551999976237615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,63,0.03721066564321518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,63,0.03547733277082443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,127,0.05091199775536855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,255,0.10452799995740254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,127,0.04397333165009817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,255,0.07041599849859874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,511,0.11693867047627766
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,511,0.18289067347844443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,1023,0.3509226640065511
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,1023,0.2325920065244039
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,2047,0.6830879847208658
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,2047,0.5954346656799316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,1,0.04837333162625631
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,float16,4095,1.3491519292195637
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,1,0.04942933221658071
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,16,128,1,float16,fp8,4095,0.8621973196665446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,3,0.050245334704717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,3,0.05146133402983347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,7,0.051776001850763954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,7,0.05291733145713806
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,15,0.06503466765085857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,31,0.06860800087451935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,15,0.06545599798361461
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,31,0.06538133323192596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,63,0.06954666475454967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,63,0.07292800148328145
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,127,0.09153599540392558
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,255,0.14577066898345947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,255,0.1317813297112783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,127,0.0925333301226298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,511,0.22178133328755698
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,1023,0.6904959678649902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,float16,511,0.35204799969991046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,16,128,1,float16,fp8,1023,0.44819732507069904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,1,0.09158933162689209
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,1,0.09397332866986592
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,3,0.09723200400670369
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,3,0.09490133325258891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,7,0.09890133142471313
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,15,0.12991467118263245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,7,0.10095999638239543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,15,0.12621866663297018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,31,0.134853333234787
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,31,0.1328053375085195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,63,0.136245330174764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,63,0.14589866995811462
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,127,0.1717066764831543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,127,0.16739734013875326
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,255,0.27477866411209106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,255,0.255514661471049
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,float16,511,0.6869973341623942
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,1,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,3,0.009786666681369146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,1,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,3,0.009999999776482582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,7,0.010048000141978264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,7,0.00984533317387104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,16,128,1,float16,fp8,511,0.42803200085957843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,15,0.009919999788204828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,15,0.010015999898314476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,63,0.012389333297808966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,31,0.010480000327030817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,63,0.012383999923865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,127,0.012576000144084295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,255,0.012874666601419449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,127,0.012453333785136541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,255,0.012437333663304647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,511,0.028399998943010967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,511,0.028197333216667175
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,1023,0.033258666594823204
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,1023,0.03291733314593633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,2047,0.04509866734345754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,2047,0.05481066803137461
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,4095,0.10891200105349223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,4095,0.07320533196131389
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,8191,0.29888532559076947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,8191,0.13260266184806824
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,16383,0.35488001505533856
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,16383,0.2392746607462565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,32767,0.6925280094146729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,32767,0.4540693362553914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,float16,65535,1.3767414093017578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,16,128,1,float16,fp8,65535,0.9049386978149414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,1,0.17805866400400797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,1,0.18242667118708292
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,3,0.18992000818252563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,3,0.18915732701619467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,7,0.2092693249384562
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,7,0.2044373353322347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,15,0.254586656888326
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,15,0.2574560046195984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,31,0.2696693340937297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,31,0.26233599583307904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,63,0.2656053304672241
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,63,0.26519999901453656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,127,0.3270133336385091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,127,0.33265066146850586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,float16,255,0.526095986366272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,16,128,1,float16,fp8,255,0.49982933203379315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,float16,1,0.3930879831314087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,fp8,1,0.3903839985529582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,float16,3,0.39500268300374347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,float16,7,0.4129066864649455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,fp8,3,0.40462398529052734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,fp8,7,0.42494932810465497
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,float16,15,0.5020266771316528
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,fp8,15,0.5102666616439819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,float16,31,0.5297919909159342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,fp8,31,0.5178560018539429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,float16,63,0.5220853487650553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,fp8,63,0.5226453145345052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,1,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,fp8,127,0.645962675412496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,1,0.009957333405812582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,3,0.009205333267649015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,3,0.009519999846816063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,16,128,1,float16,float16,127,0.6533919970194498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,7,0.009296000003814697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,7,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,15,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,31,0.011706666400035223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,31,0.011770666887362799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,63,0.011776000261306763
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,127,0.012138667205969492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,15,0.01027199998497963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,127,0.01211200033624967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,63,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,255,0.013738666971524557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,255,0.013760000467300415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,511,0.019530666371186573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,1023,0.05205333232879639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,511,0.019296000401178997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,1023,0.03046933313210805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,2047,0.09443199634552002
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,4095,0.17824532588322958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,2047,0.0639573335647583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,4095,0.11668266852696736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,8191,0.5655253330866495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,8191,0.22122132778167725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,16383,0.6902613639831543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,16383,0.43698668479919434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,1,0.014858666807413101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,float16,32767,1.5209760665893555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,16,128,1,float16,fp8,32767,1.2909226417541504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,1,0.015317333241303762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,3,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,3,0.015466666469971338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,7,0.015072000523408255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,7,0.015605332950750986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,15,0.015103999525308609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,15,0.015610666324694952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,31,0.016970666746298473
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,31,0.017184000462293625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,63,0.016869333883126576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,63,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,127,0.01725333308180173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,127,0.017456000049908955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,255,0.020282667130231857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,255,0.020330666253964107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,511,0.03537066777547201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,511,0.02845333268245061
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,1023,0.11206400394439697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,1023,0.05249066650867462
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,2047,0.15154666701952615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,2047,0.09858666857083638
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,4095,0.28624532620112103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,4095,0.19513066609700522
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,8191,0.5584479967753092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,8191,0.3715626796086629
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,1,0.009477333476146063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,float16,16383,1.7049706776936848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,12,12,128,1,float16,fp8,16383,0.730026642481486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,3,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,1,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,3,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,7,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,7,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,15,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,15,0.009888000165422758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,31,0.010538666198650995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,31,0.010197333370645842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,63,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,63,0.012149333953857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,127,0.0122079998254776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,127,0.011946666985750198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,255,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,255,0.01201066623131434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,511,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,511,0.0164533331990242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,1023,0.017557332913080852
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,2047,0.017637333522240322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,1023,0.01756799966096878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,2047,0.017456000049908955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,4095,0.017765333255132038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,4095,0.017509333789348602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,8191,0.02197333425283432
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,8191,0.02180800090233485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,16383,0.028912000358104706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,16383,0.040394666294256844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,32767,0.053541332483291626
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,65535,0.14176533619562784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,float16,32767,0.07799466451009114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,1,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,1,0.009573333586255709
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,12,12,128,1,float16,fp8,65535,0.12638933459917703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,3,0.009338666374484697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,3,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,7,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,7,0.009679999823371569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,15,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,31,0.010549332946538925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,31,0.010330666477481524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,63,0.012223999947309494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,15,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,63,0.012186666329701742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,127,0.012122667084137598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,127,0.012053333222866058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,255,0.012378666549921036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,255,0.012154666086037954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,511,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,511,0.016623999923467636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,1023,0.01766933376590411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,1023,0.017605333278576534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,2047,0.01793066660563151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,2047,0.017498667041460674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,4095,0.03728000074625015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,4095,0.036874666810035706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,8191,0.05159999926884969
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,8191,0.06320000191529591
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,16383,0.11054933071136475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,32767,0.19584532578786215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,32767,0.15743999679883322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,16383,0.09045333663622539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,1,0.014218666901191076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,float16,65535,0.3691946665445964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,12,12,128,1,float16,fp8,65535,0.3444266716639201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,1,0.014682666709025701
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,3,0.014501333236694336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,3,0.014959999670584997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,7,0.014458666245142618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,15,0.01613866661985715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,7,0.014618666221698126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,15,0.016341333587964375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,31,0.016362667083740234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,31,0.016271999726692837
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,63,0.01647466669480006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,63,0.01655999943614006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,127,0.019333332777023315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,127,0.019189332922299702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,255,0.02804800122976303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,255,0.03598399957021078
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,511,0.1108746627966563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,511,0.050613333781560264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,1023,0.150629331668218
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,1023,0.09920533498128255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,2047,0.28643733263015747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,2047,0.1950613260269165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,4095,0.5579573313395182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,4095,0.3742239872614543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,1,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,fp8,8191,0.7409066359202067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,12,12,128,1,float16,float16,8191,1.7202293078104656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,1,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,3,0.00956266683836778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,3,0.009877333417534828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,7,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,15,0.009946666657924652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,7,0.009994666402538618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,31,0.010421333213647207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,15,0.01007466639081637
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,31,0.010559999694426855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,63,0.01211200033624967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,63,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,127,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,127,0.01240533341964086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,255,0.012565333396196365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,255,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,511,0.016623999923467636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,511,0.016650666793187458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,1023,0.029418667157491047
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,1023,0.02943466603755951
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,2047,0.036943999429543815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,2047,0.03711999952793121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,4095,0.06438399851322174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,4095,0.05150400102138519
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,8191,0.10940266648928325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,8191,0.09012800455093384
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,16383,0.1955146590868632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,16383,0.15729600191116333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,32767,0.2877119978268941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,32767,0.3690933386484782
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,float16,65535,0.7126186688741049
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,1,0.02067733307679494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,12,12,128,1,float16,fp8,65535,0.6734186808268229
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,1,0.02110933264096578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,3,0.021386665602525074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,3,0.021546666820844013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,7,0.021914665897687275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,7,0.022597332795461018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,15,0.02759466568628947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,15,0.02752533306678136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,31,0.027658666173617046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,31,0.027658666173617046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,63,0.028234665592511494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,63,0.0276853342851003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,127,0.03990400085846583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,127,0.03342933456103007
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,255,0.054373333851496376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,255,0.06241066753864288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,511,0.08949333429336548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,511,0.22129066785176596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,1023,0.26632533470789593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,1023,0.17774399121602377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,2047,0.5168853203455607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,2047,0.33749866485595703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,1,0.037317333122094475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,fp8,4095,0.6536533435185751
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,12,12,128,1,float16,float16,4095,1.016426642735799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,1,0.03835200021664301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,3,0.03977599988381068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,7,0.04010133445262909
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,7,0.040805332362651825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,3,0.03885333240032196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,15,0.050154666105906166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,15,0.05073066552480062
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,63,0.0568800022204717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,31,0.050570666790008545
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,31,0.051701332132021584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,63,0.05146133402983347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,127,0.07149866720040639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,127,0.06776533524195354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,255,0.11302399635314941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,511,0.26845866441726685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,255,0.10195733110109965
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,511,0.18686934312184653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,float16,1023,0.5205653508504232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,12,12,128,1,float16,fp8,1023,0.3407413164774577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,1,0.06970666845639546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,1,0.07138133545716603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,3,0.07259733478228252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,3,0.07447466750939687
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,7,0.07478400071461995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,7,0.07658133407433827
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,15,0.09553066889444987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,31,0.10309867064158122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,15,0.09513599673906963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,31,0.09777067104975383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,63,0.10511466860771179
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,63,0.10433066884676616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,127,0.1318666636943817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,127,0.1339413324991862
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,255,0.20975999037424722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,255,0.19367466370264688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,float16,511,0.5181493361790975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,1,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,12,12,128,1,float16,fp8,511,0.3240000009536743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,1,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,3,0.009957333405812582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,3,0.009898666913310686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,7,0.010106666634480158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,7,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,15,0.010170666500926018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,15,0.009946666657924652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,31,0.010703999549150467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,63,0.012346666306257248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,127,0.012448000411192576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,63,0.012453333785136541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,127,0.012309333930412928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,255,0.012730666746695837
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,255,0.012378666549921036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,511,0.027989332874615986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,511,0.027776000400384266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,1023,0.032314665615558624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,1023,0.03257599969704946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,2047,0.04826133449872335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,4095,0.08855467041333516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,2047,0.04364266494909922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,4095,0.07088533540566762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,8191,0.1585813363393148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,8191,0.12447466452916463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,16383,0.2267733414967855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,16383,0.43671464920043945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,32767,0.5598560174306234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,32767,0.4273386796315511
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,float16,65535,1.0997333526611328
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,1,0.13447999954223633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,12,12,128,1,float16,fp8,65535,0.8409653504689535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,1,0.13854400316874185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,3,0.13983999689420065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,3,0.14297067125638327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,7,0.1542080044746399
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,7,0.14973866939544678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,15,0.19166400035222372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,15,0.192303995291392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,31,0.19830399751663208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,31,0.19883733987808228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,63,0.2017973264058431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,63,0.20703466733296713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,127,0.2521119912465413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,127,0.24680533011754355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,float16,255,0.4007146755854289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,12,12,128,1,float16,fp8,255,0.3773386478424072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,float16,1,0.28040534257888794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,fp8,1,0.2763146758079529
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,float16,3,0.2980266610781352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,fp8,3,0.30137066046396893
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,float16,7,0.31173866987228394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,fp8,7,0.3195733428001404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,float16,15,0.3781333367029826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,fp8,15,0.3838506539662679
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,float16,31,0.38864533106486004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,fp8,31,0.3898613452911377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,float16,63,0.3944213390350342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,fp8,63,0.4037919839223226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,1,0.008992000172535578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,1,0.009056000038981438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,float16,127,0.4927999973297119
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,3,0.009152000149091085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,12,12,128,1,float16,fp8,127,0.48552532990773517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,3,0.009301333377758661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,7,0.009178666397929192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,7,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,15,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,15,0.010026666646202406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,31,0.011711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,31,0.011765333513418833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,63,0.011706666400035223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,63,0.011850666254758835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,127,0.01209066684047381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,127,0.011834666132926941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,255,0.013616000612576803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,255,0.013674666484196981
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,511,0.019296000401178997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,511,0.01918399954835574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,1023,0.03370666752258936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,1023,0.030245333909988403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,2047,0.05821333328882853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,2047,0.07518933216730754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,4095,0.13936533530553183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,4095,0.13210666179656982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,8191,0.27030400435129803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,8191,0.20932799577713013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,16383,0.40773868560791016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,16383,0.536085327466329
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,1,0.00914666677514712
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,1,0.009253333633144697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,3,0.010197333370645842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,float16,32767,1.3422346115112305
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,12,12,128,1,float16,fp8,32767,1.2463146845499675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,3,0.00933333362142245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,7,0.009162666896979014
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,7,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,15,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,15,0.010138666878143946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,31,0.011877333124478659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,63,0.011962667107582092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,31,0.01210133358836174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,63,0.011823999385039011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,127,0.011957333733638128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,127,0.012005332857370377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,255,0.013898666948080063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,255,0.013770667215188345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,511,0.019808000574509304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,511,0.019178666174411774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,1023,0.052426666021347046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,2047,0.09478400150934856
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,1023,0.030426666140556335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,2047,0.06446399788061778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,4095,0.17840532461802164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,4095,0.15635200341542563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,8191,0.34627199172973633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,8191,0.22291733821233115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,16383,0.6912000179290771
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,16383,0.4325759808222453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,1,0.009258666386206945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,1,0.009589333087205887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,float16,32767,1.523311932881673
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,3,0.009530666594703993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,8,128,1,float16,fp8,32767,1.2883893648783367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,3,0.009925333162148794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,7,0.009455999980370203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,7,0.010053333515922228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,15,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,31,0.010464000205198923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,15,0.009674666449427605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,31,0.010421333213647207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,63,0.012069333344697952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,63,0.01221866657336553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,127,0.012149333953857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,127,0.011711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,255,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,255,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,511,0.0161920003592968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,511,0.01634666696190834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,1023,0.017322666943073273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,1023,0.01743999992807706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,2047,0.01743999992807706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,2047,0.017680000513792038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,4095,0.017781333376963932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,4095,0.017914666483799618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,8191,0.029626667499542236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,8191,0.02957333376010259
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,16383,0.04080000023047129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,16383,0.04037333279848099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,32767,0.08220799763997395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,32767,0.06292266647020976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,65535,0.14474667112032572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,65535,0.11748266220092773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,1,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,1,0.00983466642598311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,float16,131071,0.3709706862767537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,3,0.009397333487868309
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,8,128,1,float16,fp8,131071,0.21585599581400552
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,3,0.009514666472872099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,7,0.009301333377758661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,7,0.009701333319147428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,15,0.01009599988659223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,15,0.009546666716535887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,31,0.010250666489203772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,31,0.010543999572594961
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,63,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,127,0.011978667229413986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,63,0.01184533288081487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,127,0.01219733307758967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,255,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,255,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,511,0.01642666632930438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,511,0.016629333297411602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,1023,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,1023,0.017610666652520496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,2047,0.017935999979575474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,2047,0.0176959993938605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,4095,0.02951466788848241
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,4095,0.029711998999118805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,8191,0.04045866678158442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,8191,0.04084266722202301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,16383,0.08045333127180736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,16383,0.06178133189678192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,32767,0.14124799768129984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,32767,0.114656001329422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,65535,0.25569067398707074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,65535,0.2023786703745524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,1,0.015119999647140503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,float16,131071,0.48792000611623126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,8,128,1,float16,fp8,131071,0.45902931690216064
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,1,0.015386667102575302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,3,0.015386667102575302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,3,0.015685333559910457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,7,0.015504000087579092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,7,0.015754666179418564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,15,0.016810666769742966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,15,0.0169813334941864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,31,0.01989866668979327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,31,0.020058666666348774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,63,0.020080000162124634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,63,0.02033599962790807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,127,0.020421333611011505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,127,0.02035733312368393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,255,0.024826665719350178
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,255,0.02401600033044815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,511,0.058037335673967995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,511,0.03612266729275385
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,1023,0.09738666812578838
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,1023,0.06310933331648509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,2047,0.1824480096499125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,2047,0.12180266777674358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,4095,0.3479413191477458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,4095,0.22633600234985352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,8191,0.43277867635091144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,8191,1.1173866589864094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,1,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,1,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,float16,16383,1.358517328898112
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,3,0.009573333586255709
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,8,128,1,float16,fp8,16383,0.8503999710083008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,3,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,7,0.009610666582981745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,7,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,15,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,15,0.010064000263810158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,31,0.010682666053374609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,31,0.010629333555698395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,63,0.012223999947309494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,63,0.012245333443085352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,127,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,127,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,255,0.012666666259368261
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,255,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,511,0.016501333564519882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,1023,0.01793066660563151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,511,0.016783999900023144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,1023,0.01756799966096878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,2047,0.029616000751654308
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,4095,0.04053866614898046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,2047,0.029520000020662945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,4095,0.040021332601706185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,8191,0.07683733105659485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,8191,0.06002666552861532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,16383,0.1288533310095469
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,16383,0.10351999600728352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,32767,0.18268799781799316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,32767,0.2314186692237854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,65535,0.6267626682917277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,65535,0.3454293409983317
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,1,0.01515199989080429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,float16,131071,0.8643679618835449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,1,0.015168000012636185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,8,128,1,float16,fp8,131071,0.6734560330708822
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,3,0.01545599972208341
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,7,0.016010666886965435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,3,0.01562133307258288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,7,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,15,0.019578666736682255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,15,0.01974933346112569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,31,0.019567999988794327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,31,0.019738666713237762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,63,0.01979200045267741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,63,0.01971199984351794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,127,0.024362665911515553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,127,0.023418667415777843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,255,0.044026667873064675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,255,0.03589866558710734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,511,0.09716266393661499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,511,0.06253866851329803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,1023,0.1813066601753235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,1023,0.16005333264668783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,2047,0.3479413191477458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,2047,0.2295680046081543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,4095,0.6817920207977295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,4095,0.4410666624704997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,1,0.026378666361172993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,1,0.026730666557947796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,float16,8191,1.379792054494222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,3,0.02712533374627431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,8,128,1,float16,fp8,8191,0.8804799715677897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,3,0.02769600103298823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,7,0.028064000109831493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,7,0.028565332293510437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,15,0.035071998834609985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,15,0.03508266558249792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,31,0.035536001125971474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,31,0.03531199942032496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,63,0.03728533287843069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,63,0.03555200000603994
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,127,0.05086400111516317
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,127,0.044218664367993675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,255,0.07904000083605449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,255,0.07713066538174947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,511,0.18359466393788657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,511,0.11669333775838216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,1023,0.35281066099802655
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,1023,0.23244800170262656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,fp8,2047,0.4445066849390666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,1,0.04828799764315287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,1,0.04951466619968414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,8,128,1,float16,float16,2047,0.6841119925181071
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,3,0.050000001986821495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,3,0.05144000053405762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,7,0.05197866757710775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,7,0.05287466446558634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,15,0.06506666541099548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,15,0.06558399895826976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,31,0.06949333349863689
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,63,0.07255466779073079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,63,0.06866666674613953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,31,0.06564266482988994
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,127,0.091839998960495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,127,0.08780800302823384
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,255,0.14574933052062988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,255,0.1327359974384308
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,511,0.5684800148010254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,511,0.22155199448267618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,float16,1023,0.6892906824747721
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,1,0.009941333283980688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,1,0.010474666953086853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,3,0.009546666716535887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,7,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,8,128,1,float16,fp8,1023,0.4482239882151286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,3,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,7,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,15,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,15,0.010175999874869982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,31,0.011125333607196808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,63,0.012272000312805176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,127,0.012074666718641916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,127,0.01221866657336553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,63,0.012426666915416718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,255,0.012330666184425354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,255,0.012517333030700684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,511,0.017162666966517765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,1023,0.02961066613594691
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,1023,0.02942933390537898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,511,0.017008000363906223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,4095,0.06684266527493794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,2047,0.03849600007136663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,2047,0.03739733248949051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,4095,0.05251200000445048
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,8191,0.11471999684969585
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,8191,0.10450667142868042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,16383,0.20524799823760986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,16383,0.16024000446001688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,32767,0.3837120135625203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,32767,0.29418667157491046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,65535,0.740943988164266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,65535,0.5610453287760416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,float16,131071,1.3733332951863606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,1,0.09155733386675517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,8,128,1,float16,fp8,131071,1.210256020228068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,3,0.09486933549245198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,3,0.09742933511734009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,1,0.09388800462086995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,7,0.10105599959691365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,7,0.09876799583435059
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,15,0.12987732887268066
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,15,0.12567999958992004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,31,0.13964800039927164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,31,0.13297067085901895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,63,0.13739200433095297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,63,0.13699733217557272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,127,0.1711840033531189
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,127,0.1678826610247294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,255,0.27402132749557495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,255,0.2555999954541524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,float16,511,1.1201653480529785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,8,128,1,float16,fp8,511,0.4280960162480672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,1,0.1777706742286682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,1,0.18256000677744547
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,3,0.1893813411394755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,7,0.209114670753479
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,3,0.19003732999165854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,7,0.20401066541671753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,15,0.25857067108154297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,15,0.25838400920232135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,31,0.26180799802144367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,31,0.26254934072494507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,63,0.26628265778223675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,63,0.26500266790390015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,1,0.00996800015370051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,127,0.33204267422358197
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,127,0.3270026644070943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,1,0.010202666744589806
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,3,0.00980266680320104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,float16,255,0.6979680061340332
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,8,128,1,float16,fp8,255,0.5013759930928549
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,3,0.0100426667680343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,7,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,7,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,15,0.00983466642598311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,15,0.010186666622757912
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,31,0.010773333410422007
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,63,0.012527999778588613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,31,0.010832000523805618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,63,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,127,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,127,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,255,0.013013333082199097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,255,0.012495999534924826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,511,0.028117333849271137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,511,0.028624000648657482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,1023,0.032560000816980995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,1023,0.03347733368476232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,2047,0.05486399928728739
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,2047,0.045007998744646706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,4095,0.10845866799354553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,4095,0.07411199808120728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,8191,0.19167466958363852
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,8191,0.13353066643079123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,16383,0.35926934083302814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,16383,0.3174239993095398
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,32767,0.6924853324890137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,32767,0.4524480104446411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,float16,65535,1.3717600504557292
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,8,128,1,float16,fp8,65535,0.894437313079834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,1,0.00961599995692571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,3,0.009861333295702934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,1,0.010058666889866194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,3,0.009930666536092758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,7,0.009754666437705358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,7,0.010133333504199982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,15,0.010186666622757912
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,15,0.00997866690158844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,31,0.010693332801262537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,31,0.010847999403874079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,63,0.012362666428089142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,63,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,127,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,127,0.012256000190973282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,255,0.013141332815090815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,255,0.01250133290886879
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,511,0.02855466554562251
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,511,0.028160000840822857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,1023,0.03332799921433131
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,1023,0.0328053335348765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,2047,0.04483200112978617
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,2047,0.05542399982611338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,4095,0.108106662829717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,4095,0.07477333148320515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,8191,0.13316800196965536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,8191,0.19161067406336466
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,16383,0.3577546675999959
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,16383,0.3174720009167989
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,32767,0.6867199738820394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,32767,0.4538613160451253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,1,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,1,0.009301333377758661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,3,0.009535999968647957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,3,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,float16,65535,1.3738986651102703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,7,0.009349333122372627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,4,128,1,float16,fp8,65535,0.8956000010172526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,7,0.00979200005531311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,31,0.010346666599313417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,15,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,15,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,31,0.01028266673286756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,63,0.011871999750534693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,127,0.011973333855470022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,63,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,127,0.01191466674208641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,255,0.012058666596810022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,255,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,511,0.01602666700879733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,511,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,1023,0.017349333812793095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,1023,0.017290666699409485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,2047,0.017423999806245167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,2047,0.017573333034912746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,4095,0.017583999782800674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,4095,0.017605333278576534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,8191,0.017535999417304993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,8191,0.017765333255132038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,16383,0.017893332988023758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,16383,0.017632000148296356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,32767,0.02441066751877467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,32767,0.023232000569502514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,65535,0.08240533371766408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,65535,0.03417599946260452
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,1,0.009258666386206945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,1,0.009674666449427605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,float16,131071,0.10120532910029094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,3,0.009408000235756239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,3,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,4,128,1,float16,fp8,131071,0.07032533486684163
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,7,0.009285333255926767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,7,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,15,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,15,0.009797333429257074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,31,0.010330666477481524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,31,0.010469333579142889
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,63,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,63,0.012170666207869848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,127,0.011920000116030375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,127,0.012154666086037954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,255,0.011994666109482447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,255,0.011850666254758835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,511,0.016197333733240765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,511,0.016442666451136272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,1023,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,1023,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,2047,0.017370666066805523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,2047,0.017749333133300144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,4095,0.017583999782800674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,8191,0.02945599953333537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,4095,0.017808000246683758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,8191,0.02922666569550832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,16383,0.040752001106739044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,32767,0.106495996316274
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,16383,0.04030400017897288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,32767,0.062277331948280334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,65535,0.14292800426483154
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,65535,0.11743467052777608
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,1,0.009178666397929192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,float16,131071,0.2699039975802104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,1,0.009162666896979014
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,3,0.009392000113924345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,4,128,1,float16,fp8,131071,0.21502933899561563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,3,0.009434666484594345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,7,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,7,0.009461333354314169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,15,0.010090666512648264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,31,0.011685332904259363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,15,0.010351999973257383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,31,0.012047999848922094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,63,0.011733333269755045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,63,0.011994666109482447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,127,0.011855999628702799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,255,0.013845333208640417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,127,0.011994666109482447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,511,0.02000533292690913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,255,0.01370666672786077
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,511,0.01926400015751521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,1023,0.0524533341328303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,1023,0.030447999636332195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,2047,0.09477866689364116
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,2047,0.06385600070158641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,4095,0.17848533391952515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,4095,0.11711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,8191,0.5656319856643677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,8191,0.22202134132385254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,16383,0.6923946539560953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,16383,0.4333599805831909
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,1,0.009269333134094873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,1,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,3,0.009941333283980688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,float16,32767,1.522549311319987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,3,0.009381333366036415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,4,128,1,float16,fp8,32767,1.2876053651173909
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,7,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,7,0.009482666850090027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,15,0.00949866697192192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,15,0.009893333539366722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,31,0.010485333700974783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,31,0.010288000106811523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,63,0.012053333222866058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,127,0.012117333710193634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,127,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,63,0.012346666306257248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,255,0.012309333930412928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,511,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,511,0.01655999943614006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,255,0.012042666474978128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,1023,0.017685333887736004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,1023,0.017642666896184284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,2047,0.017664000391960144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,2047,0.017909333109855652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,4095,0.029648000995318096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,8191,0.04095999896526337
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,4095,0.02956266701221466
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,8191,0.04030400017897288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,16383,0.07932266592979431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,16383,0.061610668897628784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,32767,0.1409119963645935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,32767,0.11482666929562886
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,65535,0.25589332977930707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,65535,0.20181334018707275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,1,0.015109332899252573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,1,0.015413332730531693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,3,0.015301333119471868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,float16,131071,0.6807306607564291
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,3,0.01573333392540614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,4,128,1,float16,fp8,131071,0.37935467561086017
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,7,0.015413332730531693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,7,0.01586666703224182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,15,0.016650666793187458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,31,0.020015999674797058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,31,0.02014933278163274
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,15,0.017029333859682083
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,63,0.020256000260512035
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,63,0.02045866722861926
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,127,0.02033599962790807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,127,0.020554666717847187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,255,0.02521066615978877
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,255,0.02378133436044057
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,511,0.057989334066708885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,1023,0.09844799836476643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,511,0.036506667733192444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,1023,0.06363200147946675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,2047,0.18270933628082275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,2047,0.12157332897186279
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,4095,0.3485493262608846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,4095,0.22613867123921713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,8191,1.1202826499938965
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,8191,0.4341866572697957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,1,0.014864000181357065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,1,0.015461333096027374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,fp8,16383,0.8516426881154379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,3,0.01534933348496755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,3,0.015578666081031164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,4,128,1,float16,float16,16383,1.359338601430257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,7,0.016021333634853363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,7,0.016224000602960587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,15,0.019354666272799175
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,15,0.01979200045267741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,31,0.019546666493018467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,31,0.01974933346112569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,63,0.019760000209013622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,63,0.019765333582957584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,127,0.024405332903067272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,127,0.023525332411130268
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,255,0.04379733403523763
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,255,0.036271999279658
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,511,0.09701333443323772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,511,0.0625546673933665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,1023,0.18199467658996582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,1023,0.12235732873280843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,2047,0.5657653411229452
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,2047,0.2285333275794983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,1,0.026250667870044708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,1,0.026821332673231762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,3,0.02718399961789449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,3,0.027690666417280834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,7,0.02788266787926356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,fp8,4095,0.44172267119089764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,7,0.028736000259717304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,4,128,1,float16,float16,4095,0.682965358098348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,15,0.03503466645876566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,31,0.035429333647092186
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,63,0.03844266633192698
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,15,0.03517866631348928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,31,0.03535466641187668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,63,0.03551999976237615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,127,0.050901333491007485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,127,0.04427733520666758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,255,0.07062933345635732
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,511,0.2906613349914551
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,511,0.1169599990049998
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,1023,0.35230398178100586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,255,0.07962666451931
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,float16,2047,0.6852853298187256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,1,0.009472000102202097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,1,0.00980266680320104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,1023,0.23269865910212198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,3,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,4,128,1,float16,fp8,2047,0.4437706470489502
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,3,0.00983466642598311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,7,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,15,0.009919999788204828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,15,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,31,0.010485333700974783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,63,0.012202666451533636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,7,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,31,0.010949333508809408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,63,0.012256000190973282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,127,0.012096000214417776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,127,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,255,0.012437333663304647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,255,0.012261333564917246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,511,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,1023,0.0179626668492953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,1023,0.017738666385412216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,2047,0.029557332396507263
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,2047,0.029685333371162415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,4095,0.041082667807737984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,4095,0.04008533308903376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,8191,0.07623466849327087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,511,0.016672000288963318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,16383,0.1046506663163503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,16383,0.1280639966328939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,8191,0.059989333152770996
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,32767,0.23180800676345825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,32767,0.217631995677948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,65535,0.4405493338902791
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,65535,0.3444853226343791
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,1,0.048250665267308555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,3,0.05007466673851013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,3,0.05125333368778229
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,fp8,131071,0.6727413336435953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,1,0.04971733192602793
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,7,0.05190399785836538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,4,128,1,float16,float16,131071,0.8627680142720541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,7,0.05295999844868978
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,15,0.06517333288987477
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,15,0.0654666672150294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,31,0.06955733398596446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,31,0.06571733454863231
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,63,0.06916266679763794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,63,0.07343466579914093
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,127,0.09129599730173747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,255,0.14587733149528503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,255,0.13239999612172446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,127,0.08735466996828715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,511,0.5692373514175415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,511,0.2223093310991923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,float16,1023,0.6910133361816406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,1,0.09154666463534038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,1,0.09399466713269551
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,3,0.09486933549245198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,4,128,1,float16,fp8,1023,0.4488159815470378
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,7,0.09892266988754272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,7,0.10098666946093242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,15,0.12979732950528464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,3,0.09731733798980713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,15,0.1258240044116974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,31,0.1349493364493052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,31,0.13357866803805032
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,63,0.1411893367767334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,63,0.13782399892807007
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,127,0.17275200287501016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,127,0.16795200109481812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,1,0.009541333342591921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,1,0.010048000141978264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,255,0.2552746733029683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,255,0.27321600914001465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,3,0.010015999898314476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,3,0.010490667074918747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,7,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,fp8,511,0.42825599511464435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,7,0.010437333335479101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,15,0.009882666791478792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,31,0.010565333068370819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,4,128,1,float16,float16,511,0.6869386831919352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,15,0.010144000252087912
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,63,0.012346666306257248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,31,0.010565333068370819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,127,0.012469333906968435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,255,0.01250133290886879
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,127,0.012181332955757776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,63,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,255,0.012458667159080505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,511,0.01695999999841054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,511,0.01700266698996226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,1023,0.029680001238981884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,1023,0.02958400050799052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,2047,0.038378665844599404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,2047,0.0371573343873024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,8191,0.1141813298066457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,4095,0.05253333350022634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,4095,0.0849120020866394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,8191,0.0906986693541209
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,16383,0.20523200432459512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,32767,0.38387731711069745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,16383,0.16019200285275778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,32767,0.29306666056315106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,65535,0.7410826683044434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,1,0.04693866769472758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,65535,0.5631626844406128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,1,0.04757866760094961
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,3,0.04867733518282572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,3,0.04894933104515076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,7,0.05068266888459524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,7,0.05072000126043955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,fp8,131071,0.9007840156555176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,15,0.06257600088914235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,15,0.06306666632493337
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,4,128,1,float16,float16,131071,2.244703928629557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,31,0.06351466476917267
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,31,0.06270933151245117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,63,0.0631520003080368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,63,0.06387199958165486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,127,0.07618666688601176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,127,0.07576000193754832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,255,0.11699733138084412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,255,0.11564800143241882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,511,0.1999680002530416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,511,0.1989333430926005
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,1023,0.3646399974822998
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,2047,0.6949546337127686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,1023,0.3622453212738037
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,2047,0.6896906693776449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,float16,4095,1.3594080607096355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,1,0.04780800143877665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,1,128,1,float16,fp8,4095,1.3428799311319988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,1,0.047151997685432434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,3,0.0490880012512207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,3,0.0496319979429245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,7,0.05110399921735128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,7,0.05124799907207489
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,15,0.06329600016276042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,15,0.06372799972693126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,63,0.06438399851322174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,63,0.06392000118891399
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,31,0.06339733302593231
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,31,0.06402133405208588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,127,0.07689600189526875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,127,0.07691733539104462
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,255,0.11772800485293071
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,255,0.11838400363922119
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,511,0.20078933238983154
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,511,0.20060267051060995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,1023,0.36531198024749756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,2047,0.7010933558146158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,1023,0.3629386822382609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,2047,0.6902879873911539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,float16,4095,1.3637173970540364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,2,128,1,float16,fp8,4095,1.3533652623494465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,1,0.047882666190465294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,3,0.049738665421803795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,7,0.05138133466243744
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,31,0.0634986658891042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,15,0.063509335120519
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,63,0.0639626681804657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,127,0.010293333480755487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,31,0.06398400167624156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,127,0.012874666601419449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,15,0.06366933385531108
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,1,0.047184000412623085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,255,0.02621866762638092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,63,0.06434666613737743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,255,0.021397332350413006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,3,0.04903466502825419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,511,0.03523733218510946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,2047,0.11123200257619222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,1023,0.08135466774304707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,1023,0.05866666634877523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,2047,0.08361066381136577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,4095,0.10802132884661357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,4095,0.13730133573214212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,float16,7,0.05101333558559418
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,4,128,1,float16,fp8,511,0.0399893323580424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,1,0.047914668917655945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,3,0.0495306650797526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,15,0.06369066735108693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,7,0.0513866643110911
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,31,0.06377066671848297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,63,0.06386133531729381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,127,0.008943999807039896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,1,0.047040000557899475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,3,0.049125333627065025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,127,0.0162773331006368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,511,0.04884799818197886
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,7,0.051141331593195595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,15,0.06365333497524261
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,31,0.06401599943637848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,511,0.03219199925661087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,1023,0.06321066617965698
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,255,0.03134933362404505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,63,0.06435200075308482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,255,0.01803733284274737
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,2047,0.060080001751581825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,2047,0.10496532917022705
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,float16,4095,0.12808533509572348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,1,0.009749333063761393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,3,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,1,0.009775999933481216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,1023,0.045328001181284584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,3,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,7,0.009557333464423815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,7,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,15,0.009952000031868616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,128,8,128,1,float16,fp8,4095,0.08769599596659343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,15,0.010133333504199982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,31,0.01055466632048289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,31,0.010842667271693548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,63,0.01219733307758967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,63,0.012549333274364471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,127,0.012319999436537424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,127,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,255,0.012549333274364471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,255,0.012341332932313284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,511,0.02794666588306427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,1023,0.032501332461833954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,511,0.027973333994547527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,1023,0.032826667030652366
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,2047,0.04365866879622141
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,2047,0.04381333291530609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,4095,0.06582933167616527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,4095,0.06532800197601318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,8191,0.1090613305568695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,8191,0.10873599847157796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,float16,16383,0.1958400011062622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,1,0.00956266683836778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,1,128,1,float16,fp8,16383,0.19554666678110758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,1,0.009824000298976898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,3,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,3,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,7,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,15,0.009888000165422758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,7,0.009759999811649323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,15,0.010079999764760336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,31,0.010714666297038397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,31,0.01062400018175443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,63,0.012432000289360682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,127,0.012335999558369318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,63,0.012341332932313284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,127,0.012149333953857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,255,0.012527999778588613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,255,0.012335999558369318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,511,0.028010666370391846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,511,0.028138667345046997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,1023,0.03274133304754893
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,1023,0.03258133431275686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,2047,0.043866669138272606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,4095,0.06554133196671803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,2047,0.043791999419530235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,4095,0.06563200056552887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,8191,0.1092693308989207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,16383,0.19604800144831339
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,float16,8191,0.1090133289496104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,2,128,1,float16,fp8,16383,0.19557867447535196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,1,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,1,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,3,0.009749333063761393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,3,0.00955200009047985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,7,0.009701333319147428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,15,0.009893333539366722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,31,0.010597333312034607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,31,0.010645333677530289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,63,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,63,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,127,0.012527999778588613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,255,0.01251199965675672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,127,0.012042666474978128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,511,0.028021333118279774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,7,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,511,0.02815466622511546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,15,0.010053333515922228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,1023,0.03281066566705704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,2047,0.015637333194414776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,2047,0.017509333789348602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,1023,0.032698666055997215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,4095,0.018191999445358913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,4095,0.018842666099468868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,255,0.012351999680201212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,8191,0.022154666483402252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,8191,0.023418667415777843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,float16,16383,0.02573866645495097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,1,0.009914666414260864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,3,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,1,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,4,128,1,float16,fp8,16383,0.026549334327379864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,3,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,7,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,15,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,15,0.009919999788204828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,31,0.010496000448862711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,31,0.01044800008336703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,63,0.01221866657336553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,127,0.012448000411192576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,63,0.012357333054145178
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,127,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,255,0.012538666526476542
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,255,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,7,0.009722666814923286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,511,0.02789866675933202
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,1023,0.014021333307027817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,511,0.028010666370391846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,1023,0.01032533310353756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,2047,0.011744000017642975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,4095,0.016629333297411602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,4095,0.013408000270525614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,2047,0.015461333096027374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,8191,0.01833600054184596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,16383,0.03156266609827677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,float16,8191,0.02569066733121872
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,128,8,128,1,float16,fp8,16383,0.02239466706911723
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,1,0.00927466650803884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,1,0.008965333302815756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,3,0.009119999905427298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,3,0.00926399976015091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,7,0.00938666673998038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,7,0.009285333255926767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,15,0.01003200002014637
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,15,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,31,0.011850666254758835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,31,0.01180800050497055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,63,0.011733333269755045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,63,0.011589333415031433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,127,0.011834666132926941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,127,0.011839999506870905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,255,0.013557333499193192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,511,0.019066666563351948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,511,0.019082666685183842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,1023,0.029781334102153778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,255,0.013594667116800943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,1023,0.030213333666324615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,2047,0.05147733290990194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,2047,0.05190399785836538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,4095,0.09527466694513957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,4095,0.09416533509890239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,8191,0.17967466513315836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,8191,0.1813066601753235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,float16,16383,0.3502346674601237
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,1,128,1,float16,fp8,16383,0.3521600166956584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,1,0.009130666653315226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,1,0.009237333511312803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,3,0.008986666798591614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,7,0.009248000259200731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,3,0.009301333377758661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,7,0.009472000102202097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,31,0.011866666376590729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,31,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,15,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,15,0.009914666414260864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,63,0.011690666278203329
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,63,0.011893333246310553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,127,0.011813333878914515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,127,0.011765333513418833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,255,0.013658666362365087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,255,0.013573333621025085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,511,0.019071999937295914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,511,0.019007999449968338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,1023,0.029839999973773956
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,1023,0.030271999537944794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,2047,0.051413332422574363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,2047,0.05189333359400431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,4095,0.09436266620953877
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,8191,0.18092266718546549
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,4095,0.09518399834632874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,8191,0.18014933665593466
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,float16,16383,0.3568640152613322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,2,128,1,float16,fp8,16383,0.3524373372395833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,1,0.008992000172535578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,1,0.009248000259200731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,3,0.009141333401203156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,3,0.00926399976015091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,7,0.009359999870260557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,7,0.00933333362142245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,15,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,15,0.010112000008424124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,31,0.011727999895811081
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,63,0.011711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,31,0.011722666521867117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,63,0.011706666400035223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,511,0.018922666708628338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,255,0.013552000125249227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,127,0.011754666765530905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,255,0.013503999759753546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,127,0.011973333855470022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,511,0.018986667195955913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,1023,0.01611199975013733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,1023,0.013839999834696451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,2047,0.018746666610240936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,4095,0.018266666680574417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,4095,0.020517333100239437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,2047,0.015919999529918034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,8191,0.0320266659061114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,8191,0.03251733382542928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,float16,16383,0.03913066784540812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,1,0.009066666786869368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,4,128,1,float16,fp8,16383,0.03985599925120672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,1,0.009253333633144697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,3,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,7,0.008997333546479544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,3,0.009178666397929192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,7,0.009173333023985228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,15,0.010037333394090334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,15,0.0100426667680343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,31,0.011658667276302973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,31,0.011866666376590729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,63,0.011786667009194693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,63,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,127,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,127,0.01181866725285848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,255,0.013552000125249227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,255,0.013525333255529404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,511,0.013130666067202887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,1023,0.013983999689420065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,511,0.009658666948477427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,2047,0.02120000123977661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,1023,0.010602666685978571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,2047,0.013317332913478216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,4095,0.024154665569464367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,4095,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,8191,0.027056001126766205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,8191,0.035642666121323906
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,float16,16383,0.055200000603993736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,128,8,128,1,float16,fp8,16383,0.03605333218971888
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,1,0.08891200025876363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,1,0.09060266613960266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,3,0.09221333265304565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,3,0.0930613378683726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,7,0.09602666894594829
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,7,0.09665600458780925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,15,0.12071999907493591
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,15,0.11986666917800903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,31,0.12132267157236735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,31,0.1199679970741272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,63,0.1218346655368805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,127,0.14665599664052328
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,63,0.12064533432324727
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,127,0.14549866318702698
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,255,0.22698666652043661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,255,0.22473067045211792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,511,0.3869653145472209
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,1023,0.7107626597086588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,511,0.3887093464533488
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,1023,0.7058560053507487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,float16,2047,1.3626079559326172
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,1,0.09050666292508443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,1,0.08906666437784831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,3,0.09240000446637471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,3,0.09325333436330159
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,1,128,1,float16,fp8,2047,1.3450239499409993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,7,0.09658132990201314
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,7,0.09733866651852925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,15,0.12117866675059001
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,31,0.12184000015258789
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,15,0.12077866991360982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,31,0.12087999780972798
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,63,0.1225226620833079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,127,0.14708800117174783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,63,0.12146133184432983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,127,0.146997332572937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,255,0.22839999198913574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,255,0.2276159922281901
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,511,0.3891893227895101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,1023,0.7058826287587484
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,1023,0.717573324839274
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,511,0.3884426752726237
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,fp8,2047,1.3530453046162922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,2,128,1,float16,float16,2047,1.3684852917989094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,1,0.09058133761088054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,1,0.08895466725031535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,3,0.09353599945704143
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,3,0.09231999516487122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,15,0.12153599659601848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,7,0.09661333759625752
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,31,0.12192533413569133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,15,0.12091733018557231
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,7,0.09733866651852925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,31,0.12112533052762349
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,63,0.12268267075220744
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,63,0.12169599533081055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,127,0.02041600023706754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,255,0.03900266687075297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,255,0.03889599939187368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,511,0.0867199997107188
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,127,0.01786133274435997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,1023,0.07934933404127757
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,511,0.06107733150323232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,1023,0.11291733384132385
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,float16,2047,0.11146666606267293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,4,128,1,float16,fp8,2047,0.13893866539001465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,1,0.08875200152397156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,1,0.09061333537101746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,3,0.09361066420873006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,15,0.12143466869990031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,7,0.09764267007509868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,7,0.09672533472379048
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,3,0.09238400061925252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,15,0.12105600039164226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,31,0.12204800049463908
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,31,0.12114666899045308
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,63,0.12274133165677388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,63,0.12190933028856914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,127,0.027061333258946735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,255,0.0528053343296051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,255,0.03202133377393087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,127,0.01532799998919169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,511,0.07332799832026164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,511,0.04621333380540212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,1023,0.08596266309420268
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,1023,0.061621333161989846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,float16,2047,0.13296000162760416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,1,0.01470400020480156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,128,8,128,1,float16,fp8,2047,0.08809066812197368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,1,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,3,0.015173333386580149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,7,0.015381333728631338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,7,0.015146666516860327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,3,0.015050667027632395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,15,0.016389333953460056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,15,0.016362667083740234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,31,0.019861333072185516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,31,0.019946667055288952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,63,0.019797333826621372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,63,0.02000533292690913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,127,0.0199946661790212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,255,0.023557332654794056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,127,0.0198186660806338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,255,0.02351466566324234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,511,0.03425599883000056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,511,0.03454933315515518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,1023,0.05514666438102722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,1023,0.055829331278800964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,2047,0.09818666179974873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,2047,0.09706667065620422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,4095,0.1823199987411499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,4095,0.18105065822601318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,8191,0.34800533453623456
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,8191,0.3500106732050578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,fp8,16383,0.6852906545003256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,1,128,1,float16,float16,16383,0.6841279665629069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,1,0.014709333578745524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,1,0.014922666052977243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,3,0.015109332899252573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,7,0.015119999647140503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,3,0.015279999623696009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,7,0.015509333461523056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,15,0.01632000009218852
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,15,0.016501333564519882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,63,0.01985599969824155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,31,0.01977066695690155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,31,0.01979200045267741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,63,0.019930666933457058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,127,0.020026666422684986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,127,0.019999999552965164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,255,0.023631999890009563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,511,0.03442666679620743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,255,0.02346666653951009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,511,0.0344106654326121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,1023,0.055919999877611794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,1023,0.05532266696294149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,2047,0.09757866462071736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,2047,0.09818666179974873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,4095,0.18249066670735678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,4095,0.18198400735855103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,8191,0.3495306571324666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,8191,0.3498400052388509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,1,0.014954666296641031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,float16,16383,0.7025492986043295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,2,128,1,float16,fp8,16383,0.6857866446177164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,3,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,3,0.015487999965747198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,1,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,7,0.015087999403476715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,7,0.015306666493415833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,15,0.01643199970324834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,15,0.016672000288963318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,31,0.01988799994190534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,63,0.01998399943113327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,255,0.02363733450571696
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,127,0.019978666057189304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,127,0.02004266654451688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,63,0.01989866668979327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,31,0.01961600035429001
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,255,0.02350933353106181
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,511,0.015311999867359797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,511,0.012576000144084295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,1023,0.01624533285697301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,2047,0.02569599946339925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,1023,0.013904000322024027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,4095,0.02824000020821889
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,2047,0.024170666933059692
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,4095,0.031093334158261616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,16383,0.06965333223342896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,8191,0.04399999976158142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,fp8,16383,0.07554666697978973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,1,0.015119999647140503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,3,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,1,0.014746667196353277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,4,128,1,float16,float16,8191,0.047040000557899475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,7,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,3,0.015247999380032221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,7,0.01540800059835116
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,15,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,15,0.01657066618402799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,31,0.01960533360640208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,31,0.019839999576409657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,63,0.019893333315849304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,63,0.020053333292404812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,511,0.01403733342885971
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,255,0.013007999708255133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,255,0.009370666618148485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,127,0.020101333657900494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,127,0.020031999796628952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,511,0.010869332899649939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,1023,0.011999999483426413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,2047,0.029189333319664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,1023,0.019760000209013622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,2047,0.019760000209013622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,4095,0.042778665820757546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,4095,0.024133334557215374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,8191,0.05987200140953064
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,float16,16383,0.09115200241406758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,8191,0.0403466671705246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,128,8,128,1,float16,fp8,16383,0.06411733229955037
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,1,0.1722453236579895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,1,0.17548267046610513
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,3,0.17904533942540488
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,3,0.1809013287226359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,7,0.18651733795801798
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,7,0.18794133265813193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,15,0.2344640096028646
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,15,0.236026664574941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,31,0.23683200279871622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,31,0.2342080076535543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,63,0.23835732539494833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,63,0.2361546754837036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,127,0.2852426568667094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,255,0.44731732209523517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,127,0.28677332401275635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,255,0.4427200158437093
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,1,0.1726133426030477
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,1,0.1760693391164144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,3,0.1791093349456787
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,511,0.7661920388539633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,511,0.7634080251057943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,float16,1023,1.410373369852702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,1,128,1,float16,fp8,1023,1.3928267161051433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,3,0.18145600954691568
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,7,0.18770132462183634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,7,0.18969599405924478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,15,0.2371306618054708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,31,0.23643734057744345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,15,0.23669334252675375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,63,0.23944532871246338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,31,0.2381440003712972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,127,0.28757866223653156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,63,0.23778132597605386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,127,0.28782399495442706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,255,0.44866665204366046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,255,0.4471946557362874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,511,0.7746773560841879
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,511,0.765343983968099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,1,0.1762453317642212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,fp8,1023,1.402016003926595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,2,128,1,float16,float16,1023,1.4173547426859539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,3,0.17901867628097534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,1,0.17260799805323282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,3,0.18147732814153036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,7,0.18754132588704428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,15,0.23726399739583334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,7,0.18981333573659262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,15,0.23678400119145712
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,31,0.23650666077931723
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,63,0.2379146615664164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,31,0.2382133404413859
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,63,0.23945599794387817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,127,0.0323786661028862
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,127,0.03522666543722153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,255,0.09242666761080424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,1023,0.14191466569900513
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,fp8,511,0.11649066209793091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,255,0.06677866478761037
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,1,0.17293334007263184
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,1,0.17626132567723593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,3,0.1790613333384196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,511,0.08517332871754964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,4,128,1,float16,float16,1023,0.12331733107566833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,3,0.18159466981887817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,7,0.18773333231608072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,15,0.23729066054026285
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,15,0.23682665824890137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,31,0.23842666546503702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,7,0.19011199474334717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,63,0.2380746603012085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,31,0.236735999584198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,63,0.24393065770467123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,127,0.0273333340883255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,255,0.05809600154558817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,255,0.04155733436346054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,511,0.05585066477457682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,127,0.049509331583976746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,511,0.0782773345708847
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,float16,1023,0.12299199899037679
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,128,8,128,1,float16,fp8,1023,0.09895466764767964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,1,0.339626669883728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,1,0.3455359935760498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,3,0.3570773204167684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,3,0.35333867867787677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,15,0.4667733510335286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,7,0.3695146640141805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,7,0.36682132879892987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,31,0.46833598613739014
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,63,0.47113601366678876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,15,0.46345067024230957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,31,0.46346668402353924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,63,0.4671786626180013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,127,0.5677866538365682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,127,0.5644160111745199
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,1,0.339957316716512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,fp8,255,0.8792266845703125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,1,128,1,float16,float16,255,0.8920213381449381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,3,0.3533866802851359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,1,0.3460373481114705
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,3,0.35771199067433673
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,7,0.3694933255513509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,15,0.46865065892537433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,7,0.3737013339996338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,15,0.46808000405629474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,31,0.46747732162475586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,31,0.47074135144551593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,63,0.47413333257039386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,63,0.47074135144551593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,127,0.5798879861831665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,127,0.569599986076355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,1,0.3398880163828532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,float16,255,0.9003307024637858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,2,128,1,float16,fp8,255,0.8933173020680746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,1,0.3463360071182251
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,3,0.35255467891693115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,3,0.35729066530863446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,7,0.36961066722869873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,7,0.3741973241170247
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,15,0.4689546823501587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,31,0.4721226692199707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,15,0.4681813319524129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,31,0.46781333287556964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,63,0.48602132002512616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,127,0.06645866731802623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,63,0.47089068094889325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,127,0.07630399862925212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,fp8,255,0.1072106659412384
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,4,128,1,float16,float16,255,0.07534400125344594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,1,0.33954668045043945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,1,0.3463786840438843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,3,0.3569600184758504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,7,0.3746933142344157
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,3,0.3522239923477173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,15,0.47049065430959064
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,15,0.46887465318044025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,7,0.36994131406148273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,31,0.48206933339436847
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,63,0.4872426589330037
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,63,0.4829973379770915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,31,0.4696693420410156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,127,0.06763733426729839
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,127,0.09575999776522319
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,float16,255,0.10421866178512573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,128,8,128,1,float16,fp8,255,0.08040533463160197
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,float16,3,0.707365353902181
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,fp8,1,0.6923627058664957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,float16,1,0.6791520118713379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,float16,15,0.9332853158315023
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,float16,7,0.7376586596171061
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,fp8,3,0.7151412963867188
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,fp8,7,0.745130697886149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,fp8,15,0.9251840114593506
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,float16,31,0.9394773642222086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,fp8,31,0.9275039831797282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,fp8,63,0.9375946521759033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,float16,63,0.9490026632944742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,float16,1,0.6830346584320068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,float16,127,1.1458133061726887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,1,128,1,float16,fp8,127,1.1373546918233235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,float16,3,0.7098613580067953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,fp8,3,0.7173653443654379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,fp8,1,0.6951413154602051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,float16,7,0.7464373111724854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,float16,15,0.9424373308817545
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,fp8,7,0.7517386277516683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,fp8,15,0.9356479644775391
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,fp8,31,0.9412960211435953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,float16,31,0.9497653643290201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,float16,63,0.9581759770711263
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,float16,127,1.157040039698283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,fp8,63,0.9552746613820394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,2,128,1,float16,fp8,127,1.1635680198669434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,float16,3,0.714458703994751
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,float16,1,0.6900266806284586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,fp8,3,0.7220106919606527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,float16,7,0.7556213537851969
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,fp8,1,0.7009440263112386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,fp8,7,0.759930690129598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,fp8,15,0.9440159797668457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,float16,15,0.9454027016957601
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,float16,31,0.9530239899953207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,fp8,63,0.9608426888783773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,fp8,31,0.9478826522827148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,float16,127,0.12255466977755229
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,float16,63,0.9612906773885092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,float16,1,0.7094720204671224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,float16,3,0.7274719874064127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,4,128,1,float16,fp8,127,0.18395199378331503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,fp8,1,0.7116106351216634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,fp8,3,0.7338933149973551
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,fp8,15,0.9547200202941895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,fp8,7,0.7711733182271322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,float16,31,0.9577226638793945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,float16,7,0.7650986512502035
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,fp8,31,0.9573226769765218
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,float16,15,0.9492692947387695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,float16,63,0.9669813315073649
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,fp8,63,0.9685280323028564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,1,0.014720000326633453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,3,0.015173333386580149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,1,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,float16,127,0.18101332585016885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,3,0.01524266724785169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,128,8,128,1,float16,fp8,127,0.12873066465059915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,15,0.01939733326435089
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,7,0.015743999431530636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,15,0.019189332922299702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,7,0.01570133368174235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,31,0.019199999670187633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,31,0.01942933350801468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,127,0.02329600105683009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,63,0.019530666371186573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,63,0.019466667125622433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,255,0.03390933324893316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,127,0.02298133323589961
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,511,0.055573334296544395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,255,0.03427733232577642
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,1023,0.09851200381914775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,511,0.05586666862169901
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,2047,0.18665067354838052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,1023,0.10039466619491577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,2047,0.18372799952824911
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,4095,0.35466134548187256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,4095,0.3598080078760783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,8191,0.7091999848683676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,8191,0.711189349492391
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,fp8,16383,1.6624800364176433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,1,0.014767999450365702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,1,128,1,float16,float16,16383,1.6976906458536785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,1,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,3,0.01525866612792015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,3,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,7,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,15,0.019461333751678467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,7,0.01578666642308235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,63,0.01934933289885521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,15,0.019296000401178997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,31,0.019567999988794327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,127,0.023178666830062866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,31,0.019296000401178997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,63,0.01950399950146675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,127,0.023045333723227184
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,255,0.034058667719364166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,255,0.03399466723203659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,511,0.05558399856090546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,511,0.056048000852266945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,1023,0.09839466214179993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,2047,0.18492267529169717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,1023,0.10003200173377991
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,4095,0.35761598745981854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,2047,0.18618667125701904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,4095,0.3596373399098714
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,8191,0.7221653461456299
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,8191,0.7076746622721354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,float16,16383,1.8239199320475261
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,2,128,1,float16,fp8,16383,1.9459039370218914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,1,0.014848000059525171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,1,0.01482133318980535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,3,0.015216000378131866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,3,0.015429332852363586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,7,0.015840000162522
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,31,0.019440000255902607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,7,0.01573333392540614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,15,0.019461333751678467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,15,0.019343999524911244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,63,0.019530666371186573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,31,0.01930133377512296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,63,0.01934933289885521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,127,0.022965334355831146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,127,0.02327466756105423
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,255,0.015098666151364645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,511,0.012618667135636011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,255,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,511,0.01568000018596649
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,1023,0.0235359991590182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,2047,0.03498666733503342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,1023,0.025146665672461193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,2047,0.03364266703526179
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,4095,0.053039997816085815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,4095,0.053616002202034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,8191,0.07365866502126057
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,8191,0.09342933694521587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,float16,16383,0.11307733257611592
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,4,128,1,float16,fp8,16383,0.14250133434931436
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,1,0.014709333578745524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,1,0.014917333920796713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,3,0.015247999380032221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,7,0.01581866666674614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,3,0.015295999745527903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,7,0.015674666812022526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,15,0.019285333653291065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,31,0.019445333629846573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,31,0.019567999988794327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,15,0.019434666881958645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,63,0.019445333629846573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,63,0.01940800001223882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,255,0.013690666606028875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,255,0.00955200009047985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,127,0.010885333021481832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,511,0.019978666057189304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,127,0.00850133349498113
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,511,0.011792000383138657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,1023,0.02871999889612198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,1023,0.019952000429232914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,2047,0.044490665197372437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,2047,0.02775466690460841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,4095,0.04630400240421295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,4095,0.06811733543872833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,16383,0.12865066528320312
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,8191,0.061306665341059365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,float16,8191,0.08703999718030293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,128,8,128,1,float16,fp8,16383,0.08886933326721191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,float16,1,1.3996906280517578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,fp8,1,1.4168586730957031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,float16,3,1.4447147051493328
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,float16,7,1.4935520490010579
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,fp8,3,1.4629173278808594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,fp8,15,1.8585386276245117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,fp8,7,1.507413387298584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,float16,15,1.8678293228149414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,float16,31,1.8880480130513508
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,fp8,31,1.8627893129984539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,float16,63,1.8924320538838704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,float16,1,1.3923254013061523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,1,128,1,float16,fp8,63,1.8758026758829753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,fp8,1,1.423957347869873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,float16,7,1.5202453931172688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,fp8,3,1.4656000137329102
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,fp8,7,1.523311932881673
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,float16,3,1.445466677347819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,float16,15,1.8792266845703125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,fp8,15,1.8787306149800618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,fp8,31,1.8858453432718914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,float16,31,1.893642743428548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,float16,63,1.9211093584696453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,2,128,1,float16,fp8,63,1.9042827288309734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,float16,3,1.4479306538899739
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,fp8,1,1.4362719853719075
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,fp8,3,1.4712640444437664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,float16,1,1.405168056488037
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,float16,7,1.5137813886006672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,float16,15,1.891856034596761
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,fp8,7,1.5325493812561035
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,fp8,15,1.8839573860168457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,float16,31,1.8996159235636394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,fp8,31,1.893701394399007
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,float16,1,1.4249280293782551
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,fp8,63,1.914778709411621
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,4,128,1,float16,float16,63,1.914346694946289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,fp8,1,1.478384017944336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,fp8,3,1.4805493354797363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,float16,3,1.4609546661376953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,float16,7,1.5257387161254883
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,fp8,7,1.544325351715088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,float16,15,1.891584078470866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,fp8,31,1.925103982289632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,float16,63,1.9247360229492188
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,float16,31,1.908506711324056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,fp8,15,1.893957297007243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,128,8,128,1,float16,fp8,63,1.9305013020833333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,float16,1,2.766757329305013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,fp8,1,2.827616055806478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,float16,3,2.88371213277181
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,fp8,3,2.9475412368774414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,float16,7,2.9788106282552085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,float16,15,3.7288106282552085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,float16,31,3.7509867350260415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,fp8,7,3.0075839360555015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,fp8,31,3.718378702799479
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,float16,1,2.778832117716471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,1,128,1,float16,fp8,15,3.710789362589518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,fp8,1,2.8771146138509116
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,float16,3,2.88424015045166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,float16,7,3.0040000279744468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,float16,15,3.751002629597982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,fp8,15,3.75057061513265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,fp8,31,3.7873385747273765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,float16,1,2.8001600901285806
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,fp8,3,2.9257227579752603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,fp8,7,3.039093335469564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,2,128,1,float16,float16,31,3.779989242553711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,fp8,1,2.867818514506022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,float16,3,2.8874292373657227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,fp8,3,2.935551961263021
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,float16,7,3.052469253540039
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,fp8,7,3.0561545689900718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,float16,15,3.7603305180867515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,float16,31,3.7913331985473633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,fp8,31,3.780933380126953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,4,128,1,float16,fp8,15,3.761237462361654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,fp8,1,2.8935572306315103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,float16,1,2.8433332443237305
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,float16,3,2.975935935974121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,fp8,3,2.9525012969970703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,fp8,7,3.0834827423095703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,float16,7,3.043386777242025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,1,0.025493333737055462
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,1,0.02606400102376938
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,3,0.026789332429567974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,3,0.026464000344276428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,float16,15,3.77675724029541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,float16,31,3.8104960123697915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,7,0.027514666318893433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,7,0.027674667537212372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,15,0.03449599941571554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,15,0.034373333056767784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,31,0.03432533393303553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,63,0.03473600000143051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,63,0.03470933437347412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,31,0.03477333237727483
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,127,0.041306667029857635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,fp8,15,3.780831972757975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,127,0.04141333450873693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,128,8,128,1,float16,fp8,31,3.807562510172526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,255,0.062037333846092224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,511,0.104010671377182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,1023,0.1873226761817932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,255,0.06206933160622915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,511,0.10440533359845479
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,2047,0.35408000151316327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,1023,0.18809600671132407
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,2047,0.35507198174794513
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,4095,0.687269369761149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,4095,0.6880106925964355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,1,0.02603733291228612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,float16,8191,1.382970650990804
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,1,0.025722667574882507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,3,0.026560001075267792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,1,128,1,float16,fp8,8191,1.3598559697469075
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,7,0.027647999425729115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,7,0.027664000789324444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,3,0.026848000784715016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,15,0.03455466777086258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,15,0.034602666894594826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,63,0.03490666548411051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,31,0.0347626656293869
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,127,0.04136000076929728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,31,0.03468266626199087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,63,0.034847999612490334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,127,0.04136000076929728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,255,0.06225599845250448
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,511,0.10412266850471497
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,255,0.06238399942715963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,1023,0.1874506672223409
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,2047,0.35468268394470215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,511,0.1043839951356252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,1023,0.1876586675643921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,4095,0.7038719654083252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,2047,0.35461334387461346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,4095,0.687280019124349
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,fp8,8191,1.3853546778361003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,1,0.026005332668622334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,2,128,1,float16,float16,8191,1.3941013018290203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,3,0.02697066714366277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,3,0.026677332818508148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,1,0.025610665480295818
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,7,0.02769600103298823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,7,0.027664000789324444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,31,0.03472000112136205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,31,0.03503466645876566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,15,0.03454400102297465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,63,0.03489600121974945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,127,0.012421333541472753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,15,0.034517332911491394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,63,0.03473600000143051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,127,0.011034666250149408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,255,0.016362667083740234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,255,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,1023,0.03417599946260452
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,511,0.02250666668017705
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,511,0.02369066576162974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,1023,0.041536000867684685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,2047,0.05832533538341522
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,4095,0.08654399712880452
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,2047,0.05890666445096334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,float16,8191,0.1107413371404012
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,4095,0.11282133062680562
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,4,128,1,float16,fp8,8191,0.13734933733940125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,1,0.025701334079106648
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,3,0.02701333413521449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,1,0.026026666164398193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,3,0.02664000044266383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,7,0.027701333165168762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,7,0.027535999814669292
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,15,0.03457066665093104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,15,0.034629332522551216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,31,0.03463999927043915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,127,0.010981333752473196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,63,0.035029334326585136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,31,0.0348693331082662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,63,0.034741332133611046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,255,0.021488000949223835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,255,0.01090666651725769
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,127,0.007957333077987036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,511,0.028938665986061096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,511,0.0189280000825723
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,4095,0.0867039958635966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,1023,0.047637333472569786
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,2047,0.04557333389918009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,1023,0.03326933334271113
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,2047,0.07356266677379608
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,4095,0.060559997955958046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,float16,8191,0.12755200266838074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,128,8,128,1,float16,fp8,8191,0.08828799923261006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,1,0.02588266630967458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,3,0.026869334280490875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,7,0.02769600103298823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,15,0.034703999757766724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,1,0.0262719988822937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,3,0.0269813338915507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,7,0.027482666075229645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,15,0.03429866582155228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,31,0.03480533262093862
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,31,0.03463999927043915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,63,0.03482666611671448
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,63,0.03516799956560135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,127,0.041306667029857635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,255,0.06224533418814341
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,511,0.1042133371035258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,127,0.041434665520985924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,511,0.10487467050552368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,1023,0.18728532393773398
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,1023,0.18808533747990927
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,255,0.06227200229962667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,2047,0.35425599416097003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,2047,0.3550613323847453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,4095,0.7025866508483887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,1,0.025775998830795288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,8191,1.3872532844543457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,float16,8191,1.3933760325113933
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,1,128,1,float16,fp8,4095,0.6879146893819174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,3,0.026928000152111053
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,3,0.026741333305835724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,7,0.027642667293548584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,1,0.026074667771657307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,7,0.027562665442625683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,15,0.03473600000143051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,15,0.034629332522551216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,31,0.03453333427508672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,63,0.035216001172860466
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,31,0.03480000048875809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,63,0.03482133398453394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,127,0.009866666669646898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,255,0.01616000011563301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,255,0.01219733307758967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,511,0.022384000321229298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,511,0.023786666492621105
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,1023,0.04156800111134847
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,1023,0.03419200082619985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,127,0.012479999413092932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,2047,0.05561066667238871
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,2047,0.05914133290449778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,4095,0.11212266484896342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,4095,0.08494933446248372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,fp8,8191,0.1373546620210012
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,2,128,1,float16,float16,8191,0.11127466956774394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,1,0.025706666211287182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,3,0.026714667677879333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,3,0.026928000152111053
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,7,0.0276853342851003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,7,0.02792000025510788
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,15,0.03459733227888743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,1,0.02605333427588145
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,15,0.034645333886146545
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,31,0.034602666894594826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,63,0.03482133398453394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,63,0.03513599932193756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,31,0.03489600121974945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,127,0.011029332876205444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,127,0.007941333577036858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,255,0.0107893335322539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,511,0.018874666343132656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,511,0.029215998947620392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,1023,0.047456001242001854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,2047,0.06735466420650482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,255,0.0215786670645078
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,2047,0.04430399835109711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,1023,0.03297599901755651
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,4095,0.08668800195058186
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,4095,0.06044800082842509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,float16,8191,0.12796266873677573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,4,128,1,float16,fp8,8191,0.08777067065238953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,15,0.03461333364248276
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,3,0.027087998886903126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,7,0.027813332776228588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,1,0.026005332668622334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,31,0.034688000877698265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,63,0.03489600121974945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,1,0.025754667818546295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,3,0.026895999908447266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,127,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,127,0.008586666857202848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,7,0.02757866680622101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,15,0.03469866762558619
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,31,0.03500800083080927
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,63,0.03522133330504099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,255,0.0305173322558403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,511,0.047151997685432434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,1023,0.06151466568311056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,511,0.022015998760859173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,2047,0.08563733100891113
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,2047,0.04320000112056732
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,4095,0.1272533337275187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,4095,0.06703466673692067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,float16,8191,0.21286400159200033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,255,0.015210667004187902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,1,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,1,0.009775999933481216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,1023,0.029578665892283123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,3,0.009642666826645533
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,3,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,7,0.009482666850090027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,64,8,128,1,float16,fp8,8191,0.157231996456782
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,15,0.009775999933481216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,15,0.009808000177145004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,7,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,31,0.0103946669648091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,31,0.010384000216921171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,63,0.012293333808581034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,63,0.01211200033624967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,127,0.012144000579913458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,255,0.012389333297808966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,511,0.016773333152135212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,511,0.01685333376129468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,1023,0.029781334102153778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,255,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,2047,0.03738666574160258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,2047,0.03719466676314672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,1023,0.02945599953333537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,127,0.01227733368674914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,4095,0.05165866514046987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,4095,0.05146666864554087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,8191,0.08082133531570435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,8191,0.08066666622956593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,16383,0.13850667079289755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,16383,0.13795733451843262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,float16,32767,0.25496532519658405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,1,0.00955200009047985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,1,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,3,0.00966933307548364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,7,0.009413333609700203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,3,0.009877333417534828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,7,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,1,128,1,float16,fp8,32767,0.2529546618461609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,15,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,15,0.010090666512648264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,31,0.010549332946538925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,31,0.010319999729593595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,127,0.012351999680201212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,63,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,63,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,255,0.012245333443085352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,127,0.012026666353146235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,255,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,1023,0.029866665601730347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,511,0.016821333517630894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,511,0.016677333662907284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,2047,0.03754133234421412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,1023,0.029685333371162415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,2047,0.03713600089152654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,8191,0.02306666721900304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,4095,0.018197332819302876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,8191,0.02183466653029124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,16383,0.02480533222357432
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,16383,0.02638400097688039
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,32767,0.031925333042939506
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,fp8,32767,0.033904001116752625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,2,128,1,float16,float16,4095,0.01826133330663045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,1,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,1,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,3,0.009637333452701569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,3,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,7,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,7,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,15,0.009722666814923286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,31,0.010474666953086853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,31,0.0106133334338665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,63,0.012053333222866058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,63,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,127,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,127,0.012149333953857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,15,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,255,0.012362666428089142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,255,0.012202666451533636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,511,0.01674666628241539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,511,0.01670933390657107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,2047,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,1023,0.029887999097506206
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,1023,0.02937600016593933
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,4095,0.01594666639963786
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,4095,0.013295999417702356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,8191,0.019765333582957584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,2047,0.011541333049535751
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,8191,0.01598400001724561
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,16383,0.019354666272799175
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,16383,0.02221333235502243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,float16,32767,0.035589332381884255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,4,128,1,float16,fp8,32767,0.027999999622503918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,1,0.009733333562811216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,1,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,3,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,7,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,7,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,31,0.010464000205198923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,15,0.009914666414260864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,31,0.010666667173306147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,63,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,127,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,127,0.012165332833925882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,255,0.012469333906968435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,255,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,15,0.010064000263810158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,511,0.016682667036851246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,3,0.009578666960199675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,511,0.01682666689157486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,1023,0.013359999905029932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,1023,0.008602666358153025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,2047,0.015360000232855478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,63,0.012138667205969492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,2047,0.00979200005531311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,4095,0.016282666474580765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,8191,0.02587733417749405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,8191,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,16383,0.03086400032043457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,4095,0.010805333654085795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,16383,0.01854933301607768
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,float16,32767,0.05013333261013031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,64,8,128,1,float16,fp8,32767,0.026399999856948853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,1,0.009557333464423815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,1,0.010005333150426546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,3,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,3,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,7,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,15,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,7,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,15,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,31,0.010629333555698395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,63,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,63,0.012293333808581034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,127,0.012330666184425354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,127,0.012367999802033106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,255,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,511,0.028010666370391846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,255,0.012576000144084295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,511,0.028138667345046997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,1023,0.032485333581765495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,2047,0.04371733466784159
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,1023,0.03251733382542928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,2047,0.043840001026789345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,4095,0.06530666848023732
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,4095,0.06564799944559734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,8191,0.10870400071144104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,8191,0.10876799623171489
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,16383,0.1957813302675883
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,16383,0.19527999560038248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,1,0.009658666948477427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,float16,32767,0.37007466952006024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,1,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,1,128,1,float16,fp8,32767,0.368010679880778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,3,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,3,0.009893333539366722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,7,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,7,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,15,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,31,0.010501333822806677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,15,0.010010666524370512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,31,0.010661333799362183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,63,0.012223999947309494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,63,0.0120319997270902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,127,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,127,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,255,0.012618667135636011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,511,0.02807466685771942
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,511,0.02810666710138321
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,255,0.0122079998254776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,1023,0.03267733256022135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,1023,0.03268266717592875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,2047,0.01820266619324684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,2047,0.015770666301250458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,4095,0.01947733387351036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,4095,0.01823466643691063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,8191,0.021989333132902782
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,8191,0.02369066576162974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,16383,0.026730666557947796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,16383,0.026661333938439686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,float16,32767,0.045381332437197365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,1,0.009770666559537252
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,2,128,1,float16,fp8,32767,0.04939733445644379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,1,0.00984533317387104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,3,0.00961599995692571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,3,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,7,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,15,0.009861333295702934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,15,0.009877333417534828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,31,0.0106133334338665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,7,0.00985599992175897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,31,0.010757333288590113
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,63,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,63,0.012272000312805176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,127,0.012389333297808966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,127,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,255,0.012554666648308435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,255,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,511,0.027994667490323383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,511,0.028101332485675812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,1023,0.013781332721312841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,1023,0.010437333335479101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,2047,0.01607999950647354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,2047,0.01173866664369901
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,4095,0.013487999637921652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,4095,0.017727999637524288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,8191,0.02649066597223282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,8191,0.018378666291634243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,16383,0.032074667513370514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,16383,0.022106667359670002
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,float16,32767,0.05285866558551788
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,1,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,4,128,1,float16,fp8,32767,0.042405332128206887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,1,0.00980266680320104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,3,0.009658666948477427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,3,0.00997866690158844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,7,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,7,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,15,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,15,0.010010666524370512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,31,0.010549332946538925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,31,0.010512000570694605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,63,0.012357333054145178
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,63,0.012256000190973282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,127,0.012330666184425354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,127,0.012223999947309494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,255,0.012330666184425354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,511,0.013194666554530462
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,511,0.008053333188096682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,255,0.012661332885424295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,1023,0.013807999591032663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,1023,0.00884799969693025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,2047,0.020421333611011505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,4095,0.024336000283559162
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,4095,0.012906666845083237
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,2047,0.011007999380429586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,8191,0.03537066777547201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,8191,0.01752000053723653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,16383,0.05459733307361603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,16383,0.02906133234500885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,float16,32767,0.08141333361466725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,1,0.047168001532554626
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,64,8,128,1,float16,fp8,32767,0.043866669138272606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,3,0.04886400202910105
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,1,0.048026666045188904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,3,0.04948266843954722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,7,0.05093866586685181
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,7,0.05117333432038625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,15,0.0633653352657954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,15,0.06317866841952006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,31,0.06398933132489522
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,31,0.063509335120519
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,63,0.06404800216356914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,63,0.06442133088906606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,127,0.07720000048478444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,127,0.07696000238259633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,255,0.11858666936556499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,255,0.11820800105730693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,511,0.2007733384768168
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,511,0.2009920080502828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,1023,0.3652053276697795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,1023,0.36400000254313153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,2047,0.7007733186086019
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,2047,0.6908960342407227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,float16,4095,1.3613386154174805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,1,0.04714133342107137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,3,0.048954665660858154
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,1,128,1,float16,fp8,4095,1.3551573753356934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,1,0.04796266555786133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,3,0.04970133304595947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,7,0.05100800096988678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,7,0.05101866523424784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,15,0.06358399987220764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,31,0.06404800216356914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,15,0.06351466476917267
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,63,0.06419200201829274
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,63,0.0644053320089976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,31,0.0634986658891042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,127,0.013034666577974955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,127,0.010224000240365664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,255,0.026149332523345947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,255,0.021359999974568684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,511,0.040005333721637726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,511,0.0347626656293869
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,1023,0.05831466615200043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,1023,0.07378666599591573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,2047,0.09034666419029236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,2047,0.11106666922569275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,float16,4095,0.11050666371981303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,1,0.048063998421033226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,1,0.047168001532554626
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,2,128,1,float16,fp8,4095,0.13766933480898538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,3,0.04906133313973745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,15,0.06358399987220764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,63,0.06469866633415222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,15,0.06348800162474315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,7,0.05133866767088572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,3,0.0496373325586319
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,7,0.051072001457214355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,31,0.06363200147946675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,31,0.06423466900984447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,63,0.0642133355140686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,127,0.01657066618402799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,127,0.009946666657924652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,255,0.03148799886306127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,511,0.0499893327554067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,255,0.01786133274435997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,511,0.03218133250872294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,1023,0.06358933448791504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,1023,0.04531733194986979
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,2047,0.08684266606966655
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,1,0.0473280002673467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,2047,0.060229331254959106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,3,0.04925866425037384
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,float16,4095,0.12928533554077148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,4,128,1,float16,fp8,4095,0.10317867000897725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,1,0.04801600178082784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,3,0.04975466430187225
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,7,0.05090666810671488
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,15,0.06362133224805196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,7,0.05137600004673004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,15,0.06388799846172333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,31,0.06433066725730896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,127,0.026234666506449383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,31,0.06366399923960368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,63,0.06451199948787689
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,63,0.0642080008983612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,255,0.05013866722583771
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,127,0.012479999413092932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,511,0.029493334392706554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,255,0.02207999924818675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,511,0.06524799764156342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,1023,0.08380800485610962
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,2047,0.1293653349081675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,2047,0.06760533154010773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,float16,4095,0.2169813315073649
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,4095,0.11451199650764465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,64,8,128,1,float16,fp8,1023,0.054234668612480164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,1,0.009893333539366722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,1,0.01032533310353756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,3,0.010122666756312052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,7,0.00916800027092298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,3,0.010378666842977205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,15,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,7,0.009381333366036415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,63,0.011834666132926941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,31,0.011893333246310553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,63,0.011690666278203329
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,127,0.011711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,31,0.011637333780527115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,255,0.013712000101804733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,127,0.011749333391586939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,255,0.013536000003417334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,1023,0.029882666965325672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,2047,0.051327998439470925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,511,0.019061333189407986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,511,0.019002666076024372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,1023,0.030373332401116688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,4095,0.09521599610646565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,2047,0.05201066533724467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,8191,0.17961599429448447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,8191,0.18124266465504965
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,16383,0.35068798065185547
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,32767,0.9414292971293131
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,16383,0.35233600934346515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,float16,4095,0.09405333797136943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,1,0.00916800027092298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,1,128,1,float16,fp8,32767,0.8156853516896566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,3,0.009194666519761086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,1,0.010175999874869982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,3,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,7,0.009077333534757296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,7,0.010399999717871347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,15,0.009930666536092758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,31,0.011744000017642975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,31,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,63,0.011653333902359009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,63,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,127,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,15,0.01091733326514562
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,255,0.013525333255529404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,511,0.019120000302791595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,255,0.013760000467300415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,127,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,1023,0.01611199975013733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,1023,0.013967999567588171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,511,0.01893866683046023
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,4095,0.022181332111358643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,4095,0.018266666680574417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,2047,0.01580799991885821
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,8191,0.03326933334271113
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,8191,0.032586666444937386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,16383,0.04009599983692169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,2047,0.019786667078733444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,16383,0.03966933240493139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,float16,32767,0.0672106643517812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,1,0.00895999992887179
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,2,128,1,float16,fp8,32767,0.06606400012969971
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,1,0.00922133338948091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,3,0.009237333511312803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,3,0.009455999980370203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,15,0.010112000008424124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,15,0.010117333382368088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,7,0.009418666362762451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,31,0.011722666521867117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,31,0.011861333002646765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,63,0.011829332758982977
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,63,0.012026666353146235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,7,0.009189333145817121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,127,0.011978667229413986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,127,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,255,0.013744000345468521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,511,0.013023999830087027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,255,0.013445333888133367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,511,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,1023,0.013653332988421122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,2047,0.02237333357334137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,2047,0.013199999928474426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,4095,0.025450666745503742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,4095,0.015306666493415833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,1023,0.010565333068370819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,8191,0.026965332527955372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,16383,0.05650666852792104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,8191,0.0374293327331543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,16383,0.03498666733503342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,float16,32767,0.0844533344109853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,4,128,1,float16,fp8,32767,0.06122133135795593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,1,0.00897066667675972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,3,0.009248000259200731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,3,0.009232000137368837
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,7,0.009205333267649015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,7,0.009434666484594345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,1,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,15,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,15,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,31,0.011776000261306763
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,31,0.011834666132926941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,63,0.011941333611806234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,63,0.011978667229413986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,255,0.012949333836634954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,511,0.008325333396593729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,127,0.011909333368142446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,255,0.008127999802430471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,511,0.013424000392357508
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,1023,0.019461333751678467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,1023,0.01009599988659223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,127,0.011834666132926941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,2047,0.028602667152881622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,2047,0.012858666479587555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,4095,0.041221333046754204
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,4095,0.019904000063737232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,8191,0.05952533086140951
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,8191,0.026949333647886913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,16383,0.08918399612108867
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,32767,0.06971733272075653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,float16,32767,0.13049599528312683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,1,0.08981333176294963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,1,0.0913813312848409
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,3,0.09276800354321797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,3,0.09390399853388469
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,64,8,128,1,float16,fp8,16383,0.05798399945100149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,7,0.0962506632010142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,7,0.09731733798980713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,15,0.12102933724721272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,31,0.12190933028856914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,63,0.12307733297348022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,127,0.14747732877731323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,31,0.12100266416867574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,127,0.14749866724014282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,255,0.22859734296798706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,15,0.12132799625396729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,255,0.22801067431767783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,63,0.12230400244394939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,511,0.38949334621429443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,511,0.3893386522928874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,1023,0.7073226769765218
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,2047,1.3646613756815593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,float16,1023,0.7178293069203695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,1,0.08974400162696838
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,1,128,1,float16,fp8,2047,1.3546346028645833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,3,0.09288000067075093
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,3,0.0939466655254364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,7,0.09628267089525859
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,7,0.09718400239944458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,15,0.12088533242543538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,15,0.12144533793131511
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,31,0.12198932965596516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,1,0.0912000040213267
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,31,0.12115200360616048
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,63,0.12319466471672058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,63,0.12226667006810506
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,127,0.020314666132132213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,127,0.017701332767804463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,255,0.03904533386230469
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,255,0.039018665750821434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,511,0.07454399764537811
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,1023,0.11257066329320271
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,1023,0.0794239987929662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,2047,0.111653337876002
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,float16,511,0.06346666812896729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,1,0.08984532952308655
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,2,128,1,float16,fp8,2047,0.1388106644153595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,1,0.09126933415730794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,3,0.09407466650009155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,3,0.09301333626111348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,7,0.09643200039863586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,7,0.09741866588592529
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,15,0.12101333340009053
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,31,0.12210133671760559
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,63,0.12321600317955017
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,63,0.12264000376065572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,127,0.027765333652496338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,31,0.12108799815177917
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,127,0.015168000012636185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,15,0.12142399946848552
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,255,0.052629331747690834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,255,0.03230933348337809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,511,0.06740800042947133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,511,0.046570668617884316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,1023,0.0862559974193573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,float16,2047,0.13319466511408487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,1023,0.060922667384147644
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,1,0.08993599812189738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,1,0.09165866176287334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,3,0.09301333626111348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,3,0.09430399537086487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,7,0.09634133179982503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,4,128,1,float16,fp8,2047,0.10495466987291972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,15,0.12111467123031616
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,7,0.09770133097966512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,31,0.12134400010108948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,31,0.12226133545239766
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,15,0.12157332897186279
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,63,0.12318933010101318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,63,0.12269866466522217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,127,0.01807466646035512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,255,0.05569600065549215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,255,0.02643733223279317
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,511,0.0747680018345515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,127,0.04876266419887543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,1023,0.11849600076675415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,1023,0.06398933132489522
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,511,0.04050666590531667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,float16,2047,0.20889600118001303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,1,0.17397334178288779
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,64,8,128,1,float16,fp8,2047,0.11140799522399902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,1,0.17729065815607706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,3,0.18230400482813516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,7,0.18777066469192505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,3,0.17997866868972778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,15,0.23721599578857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,15,0.23673067490259805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,7,0.18969066937764487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,31,0.23851199944814047
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,31,0.23651200532913208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,63,0.2405653397242228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,63,0.2395520011583964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,127,0.2891040047009786
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,255,0.4496266841888428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,511,0.7742559909820557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,1,0.17414933443069458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,511,0.7660640080769857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,float16,127,0.28800533215204877
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,1,0.17744000752766928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,1,128,1,float16,fp8,255,0.4481653372446696
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,3,0.17985600233078003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,3,0.18213866154352823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,7,0.18805867433547974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,7,0.19001599152882895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,31,0.23663999636967978
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,31,0.23877867062886557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,15,0.237119992574056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,63,0.24043200413386026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,63,0.2395253380139669
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,15,0.23684799671173096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,127,0.03533333291610082
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,127,0.032511999209721885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,255,0.06718933085600536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,255,0.08678932984670003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,float16,511,0.08513599634170532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,2,128,1,float16,fp8,511,0.11567999919255574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,3,0.1797973314921061
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,1,0.1741066575050354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,3,0.1821813384691874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,7,0.18799465894699097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,7,0.1900213360786438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,1,0.17747733990351358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,15,0.23735467592875162
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,15,0.23708266019821167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,31,0.23892800013224283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,31,0.23691733678181967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,63,0.24365333716074625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,63,0.23973333835601807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,127,0.049327999353408813
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,255,0.05871999760468801
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,127,0.02789866675933202
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,255,0.04161066561937332
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,float16,511,0.07842133442560832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,4,128,1,float16,fp8,511,0.05596800148487091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,1,0.1773279905319214
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,3,0.18010133504867554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,3,0.18233599265416464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,1,0.17410133282343546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,7,0.18820265928904215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,7,0.1904319922129313
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,15,0.2376213272412618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,15,0.23742934068044028
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,31,0.24009066820144653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,31,0.23748799165089926
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,63,0.25038933753967285
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,127,0.03663466622432073
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,63,0.24266666173934937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,255,0.09852266311645508
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,127,0.09066667159398396
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,255,0.04948266843954722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,float16,511,0.13427199920018515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,64,8,128,1,float16,fp8,511,0.07005333403746287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,1,0.3430986801783244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,1,0.34990934530893963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,3,0.3601440191268921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,3,0.35608001550038654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,7,0.3736799955368042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,15,0.46911998589833576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,31,0.4718133211135864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,7,0.3702346483866374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,31,0.46801066398620605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,15,0.4686239957809448
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,63,0.47654934724171955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,127,0.5809333324432373
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,127,0.5739573240280151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,63,0.4738080104192098
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,float16,255,0.9017439683278402
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,1,0.35022934277852374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,1,0.3433813254038493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,3,0.354533314704895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,3,0.3598613341649373
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,1,128,1,float16,fp8,255,0.8986559708913168
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,7,0.37067198753356934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,7,0.37461864948272705
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,15,0.46914132436116535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,15,0.4687360127766927
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,31,0.4724533160527547
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,31,0.46882665157318115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,63,0.4840000073115031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,127,0.06715733309586842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,63,0.47396798928578693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,float16,255,0.07492266595363617
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,255,0.10705066720644633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,1,0.3434720039367676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,2,128,1,float16,fp8,127,0.07308266560236613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,1,0.3498400052388509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,3,0.3543146848678589
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,3,0.3598133325576782
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,63,0.4893440008163452
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,15,0.4703199863433838
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,15,0.46932268142700195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,7,0.3747306664784749
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,63,0.48580265045166016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,31,0.4715893268585205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,31,0.48368533452351886
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,7,0.3709919850031535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,127,0.08987200260162354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,127,0.06739200154940288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,float16,255,0.10505599776903789
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,1,0.34324268500010174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,4,128,1,float16,fp8,255,0.0803413341442744
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,1,0.3499306837717692
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,3,0.3555786609649658
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,7,0.37561599413553876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,7,0.3759946823120117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,15,0.47889065742492676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,31,0.48757867018381756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,3,0.35981865723927814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,15,0.47231467564900714
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,31,0.4830666780471802
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,63,0.4930186669031779
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,63,0.4933813412984212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,127,0.16061333815256754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,127,0.06806399921576183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,float16,255,0.18499199549357095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,1,0.014773332824309668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,3,0.015285332997639975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,1,0.015061333775520325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,3,0.01504533365368843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,7,0.01498666654030482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,7,0.015322666615247726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,15,0.016447999825080235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,64,8,128,1,float16,fp8,255,0.10429333647092183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,31,0.01977066695690155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,15,0.016554666062196095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,63,0.01989866668979327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,31,0.019797333826621372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,63,0.02013333390156428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,127,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,127,0.020047999918460846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,255,0.023498666783173878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,511,0.03455466777086258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,511,0.03454933315515518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,1023,0.055999999245007835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,4095,0.1809119979540507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,2047,0.09820800026257832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,2047,0.09733866651852925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,1023,0.05542399982611338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,255,0.02342933416366577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,4095,0.18226667245229086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,16383,0.7028746604919434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,8191,0.34997332096099854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,8191,0.34876267115275067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,16383,0.6852479775746664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,float16,32767,1.8021225929260254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,1,0.015077333897352219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,1,128,1,float16,fp8,32767,1.875157356262207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,3,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,1,0.014688000082969666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,3,0.015573333948850632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,7,0.015397333850463232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,7,0.015216000378131866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,15,0.01637866720557213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,15,0.01647466669480006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,31,0.019882666567961376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,31,0.01993600030740102
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,63,0.020037333170572918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,127,0.020117333779732387
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,63,0.019930666933457058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,255,0.023669332265853882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,255,0.023503998915354412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,127,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,511,0.012655999511480331
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,1023,0.015989333391189575
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,1023,0.014127999544143677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,511,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,2047,0.02443733314673106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,4095,0.03070399910211563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,8191,0.04625066618124644
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,2047,0.02585600068171819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,4095,0.028175999720891316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,8191,0.04419200122356415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,16383,0.06715199848016103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,16383,0.07487999896208446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,float16,32767,0.10483200351397197
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,1,0.01461333284775416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,2,128,1,float16,fp8,32767,0.1283680001894633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,3,0.015103999525308609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,3,0.01551466683546702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,1,0.015109332899252573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,7,0.01509333277742068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,15,0.016575999557971954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,7,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,15,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,31,0.019695999721686046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,63,0.01995733380317688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,127,0.020037333170572918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,31,0.019952000429232914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,127,0.02015999952952067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,255,0.01312000056107839
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,63,0.019834666202465694
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,511,0.013669333110253016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,255,0.00926399976015091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,511,0.009882666791478792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,1023,0.019632000476121902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,2047,0.029093332588672638
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,1023,0.012042666474978128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,4095,0.023887999355793
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,2047,0.020128000527620316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,8191,0.059765333930651345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,4095,0.04312000175317129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,8191,0.03999999910593033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,16383,0.06398400167624156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,16383,0.09089600046475728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,1,0.014767999450365702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,fp8,32767,0.09844799836476643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,4,128,1,float16,float16,32767,0.13242133458455405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,1,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,3,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,3,0.01552533358335495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,7,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,7,0.01544533297419548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,15,0.016554666062196095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,15,0.01666133354107539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,31,0.019850666324297588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,31,0.01982933282852173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,63,0.022661333282788593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,63,0.02004266654451688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,127,0.010693332801262537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,255,0.01357866699496905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,127,0.007071999832987785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,255,0.008053333188096682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,511,0.009514666472872099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,511,0.019487999379634857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,2047,0.043280000487963356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,2047,0.019167999426523846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,4095,0.03107200066248576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,4095,0.06919999917348225
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,1023,0.015605332950750986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,8191,0.0869706670443217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,8191,0.04445866743723551
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,1023,0.027893332143624622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,16383,0.1276479959487915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,16383,0.06869866450627644
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,float16,32767,0.21333332856496176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,64,8,128,1,float16,fp8,32767,0.11567466457684834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,float16,1,0.6897973219553629
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,fp8,1,0.7025226751963297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,float16,3,0.7182400226593018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,float16,7,0.7474666436513265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,fp8,7,0.7531946500142416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,float16,15,0.9414453506469727
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,fp8,15,0.9370079835255941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,fp8,3,0.7275359630584717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,float16,31,0.9534400304158529
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,fp8,31,0.9424053033192953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,float16,63,0.9630080064137777
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,fp8,63,0.9597012996673584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,float16,127,1.1607733567555745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,1,128,1,float16,fp8,127,1.1593226591746013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,float16,1,0.6932853062947592
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,float16,3,0.7212106386820475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,fp8,1,0.7120532989501953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,fp8,3,0.7258293628692627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,float16,7,0.7576213677724203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,fp8,7,0.7622719605763754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,float16,15,0.9460426966349283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,fp8,15,0.9429120222727457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,float16,31,0.9560800393422445
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,fp8,31,0.951685349146525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,fp8,63,0.9652053515116373
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,float16,127,0.12357333302497864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,fp8,127,0.1835306684176127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,float16,1,0.7029813130696615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,2,128,1,float16,float16,63,0.9734453360239664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,fp8,1,0.714458703994751
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,float16,3,0.7296106815338135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,fp8,3,0.7373173236846924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,float16,7,0.7664266427357992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,fp8,7,0.7751146952311198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,float16,15,0.9501760005950928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,fp8,15,0.9513599872589111
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,fp8,31,0.9597386519114176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,float16,63,0.9710079828898112
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,fp8,63,0.973578691482544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,float16,31,0.971226692199707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,fp8,127,0.1300320029258728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,4,128,1,float16,float16,127,0.168938676516215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,float16,1,0.7186666329701742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,fp8,1,0.7316853205362955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,float16,3,0.739402691523234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,fp8,3,0.7488053639729818
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,float16,7,0.7727519671122233
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,float16,15,0.9559146563212076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,float16,31,0.9676640033721924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,fp8,31,0.9672906398773193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,fp8,15,0.9584320386250814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,fp8,7,0.7947519620259603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,float16,63,0.9776853720347086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,fp8,63,0.9801066716512045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,float16,127,0.3102666735649109
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,64,8,128,1,float16,fp8,127,0.12772267063458762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,float16,1,1.402938683827718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,fp8,1,1.4319519996643066
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,float16,7,1.5094614028930664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,fp8,7,1.527034600575765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,float16,15,1.882256031036377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,fp8,3,1.4896373748779297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,float16,3,1.4568799336751301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,fp8,15,1.8829332987467449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,float16,31,1.9000320434570312
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,fp8,31,1.8911360104878743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,float16,63,1.9187307357788086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,1,128,1,float16,fp8,63,1.914789358774821
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,float16,1,1.4106933275858562
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,fp8,1,1.4672212600708008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,fp8,3,1.4793972969055176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,float16,7,1.5163680712382
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,float16,3,1.4545547167460124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,fp8,7,1.5364586512247722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,float16,15,1.8863412539164226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,fp8,15,1.8884000778198242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,float16,31,1.9053173065185547
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,float16,63,1.9237173398335774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,fp8,63,1.9239947001139324
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,fp8,1,1.4556105931599934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,2,128,1,float16,fp8,31,1.9126453399658203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,float16,1,1.4291253089904785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,float16,3,1.4637920061747234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,fp8,3,1.4877920150756836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,float16,7,1.528117338816325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,fp8,7,1.5485706329345703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,float16,15,1.8930986722310383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,fp8,15,1.8979040781656902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,float16,31,1.9335625966389973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,float16,63,1.9339359601338704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,fp8,63,1.9398345947265625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,4,128,1,float16,fp8,31,1.9121546745300293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,float16,1,1.4376160303751628
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,fp8,1,1.4664427439371746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,float16,3,1.4727786382039387
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,fp8,3,1.4965759913126628
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,float16,7,1.5584373474121094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,fp8,7,1.5600159962972004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,float16,15,1.9038559595743816
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,float16,31,1.9288320541381836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,fp8,31,1.9278133710225422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,float16,63,1.9476693471272786
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,1,0.014746667196353277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,fp8,15,1.9106720288594563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,3,0.015119999647140503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,3,0.01543466622630755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,1,0.014767999450365702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,7,0.01573866605758667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,7,0.015802666544914246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,15,0.019365333020687103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,31,0.019333332777023315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,15,0.019445333629846573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,31,0.019413333386182785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,64,8,128,1,float16,fp8,63,1.952799956003825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,63,0.019482667247454327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,127,0.023242667317390442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,63,0.019381333142518997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,127,0.023071999351183575
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,255,0.03408533334732056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,511,0.05542399982611338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,255,0.034048000971476235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,1023,0.09825600186983745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,511,0.05624000231424967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,1023,0.0997973382472992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,2047,0.18650666872660318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,4095,0.3546666701634725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,4095,0.3599626620610555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,2047,0.18381333351135254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,8191,0.7100959618886312
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,8191,0.7223359743754069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,1,0.014650666465361914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,float16,16383,1.8072266578674316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,1,0.014885333677132925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,3,0.015279999623696009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,7,0.015743999431530636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,1,128,1,float16,fp8,16383,1.9314026832580566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,15,0.019359999646743137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,3,0.015184000134468079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,7,0.015802666544914246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,31,0.01940800001223882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,15,0.019391999890406925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,63,0.0195573332409064
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,31,0.01932799940307935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,127,0.023317334552605946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,63,0.019413333386182785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,255,0.01191466674208641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,255,0.014906667172908783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,1023,0.025455998877684276
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,511,0.012725333372751871
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,127,0.02314666658639908
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,511,0.015589332828919092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,1023,0.02332799881696701
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,2047,0.03348266581694285
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,4095,0.053039997816085815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,2047,0.03499733408292135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,8191,0.07427733143170674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,4095,0.05381333331267039
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,float16,16383,0.11517866452534993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,16383,0.13571199774742126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,2,128,1,float16,fp8,8191,0.09904000163078308
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,1,0.01498666654030482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,1,0.014602666099866232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,3,0.015370666980743408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,3,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,7,0.01586666703224182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,7,0.015664000064134598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,15,0.01956266661485036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,31,0.019493332753578823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,31,0.019189332922299702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,63,0.019440000255902607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,127,0.010650667051474253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,15,0.019333332777023315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,255,0.013642666240533194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,63,0.01950399950146675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,127,0.008762666955590248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,255,0.010399999717871347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,511,0.019472000499566395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,511,0.011242666592200598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,1023,0.02845866729815801
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,2047,0.04484266539414724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,2047,0.02792533238728841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,8191,0.08897599577903748
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,4095,0.04607999821503957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,4095,0.06917333106199901
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,1023,0.019999999552965164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,8191,0.06081066528956095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,float16,16383,0.12948800126711527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,4,128,1,float16,fp8,16383,0.08872000376383464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,1,0.014725333700577417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,1,0.015024000157912573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,3,0.01522133375207583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,7,0.015882667154073715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,15,0.019472000499566395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,3,0.015279999623696009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,15,0.019424000134070713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,7,0.015743999431530636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,31,0.01941866676012675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,31,0.0195573332409064
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,63,0.01958400011062622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,63,0.019498666127522785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,127,0.011002667248249054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,127,0.008383999889095625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,255,0.02109866589307785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,511,0.027999999622503918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,1023,0.04637866715590159
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,511,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,255,0.009365333244204521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,1023,0.02223466585079829
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,2047,0.06638399759928386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,4095,0.08531733353932698
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,4095,0.0433599998553594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,2047,0.029658667743206024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,8191,0.12650133172671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,1,0.014864000181357065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,8191,0.0671253353357315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,float16,16383,0.3105493386586507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,64,8,128,1,float16,fp8,16383,0.11381866534550984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,1,0.014959999670584997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,3,0.015087999403476715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,7,0.015829333414634068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,7,0.01580799991885821
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,3,0.01526933287580808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,15,0.01941866676012675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,31,0.019381333142518997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,31,0.019482667247454327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,63,0.019509332875410717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,15,0.01930133377512296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,63,0.019343999524911244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,127,0.023221333821614582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,255,0.015205333630243937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,255,0.011978667229413986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,127,0.023178666830062866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,511,0.012639999389648438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,511,0.015487999965747198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,1023,0.025445332129796345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,2047,0.03525333354870478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,1023,0.023226665953795116
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,2047,0.0334346666932106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,4095,0.053317333261171974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,8191,0.0751146674156189
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,4095,0.05292266607284546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,float16,16383,0.11333866914113362
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,8191,0.09312533338864644
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,1,128,1,float16,fp8,16383,0.13581867019335428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,1,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,1,0.014746667196353277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,3,0.01526933287580808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,3,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,7,0.01587733378012975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,7,0.015840000162522
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,15,0.01937599976857503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,15,0.01942933350801468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,31,0.01956266661485036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,63,0.0194560003777345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,31,0.019248000035683315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,63,0.019487999379634857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,255,0.013728000223636627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,511,0.011333333949247995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,127,0.008896000062425932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,255,0.010389333590865135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,511,0.01961600035429001
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,1023,0.020234666764736176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,1023,0.028581333657105763
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,2047,0.04473066826661428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,2047,0.027776000400384266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,4095,0.06950399776299794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,4095,0.045226668318112694
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,8191,0.08764800429344177
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,1,0.014746667196353277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,float16,16383,0.12917866309483847
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,8191,0.06115733087062836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,1,0.015024000157912573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,2,128,1,float16,fp8,16383,0.10479467113812764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,3,0.015317333241303762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,7,0.01594666639963786
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,31,0.01953599974513054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,15,0.019466667125622433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,3,0.015423999478419622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,7,0.01579733317097028
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,15,0.01939733326435089
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,31,0.019424000134070713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,63,0.01966399947802226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,63,0.019466667125622433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,127,0.011039999624093374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,127,0.007247999931375186
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,255,0.02092266579469045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,511,0.028255999088287354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,1023,0.046394666035970054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,255,0.010117333382368088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,1023,0.022069332500298817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,2047,0.06642666459083557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,511,0.01505600040157636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,2047,0.02979733298222224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,4095,0.08592533071835835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,8191,0.12773332993189493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,4095,0.04311466713746389
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,8191,0.0669653316338857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,float16,16383,0.21273066600163779
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,4,128,1,float16,fp8,16383,0.11417599519093831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,7,0.01598400001724561
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,3,0.01565333331624667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,1,0.015029333531856537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,15,0.019413333386182785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,31,0.019440000255902607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,63,0.01964266722400983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,127,0.008223999912540117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,1,0.014762666076421738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,3,0.015216000378131866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,15,0.01941866676012675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,31,0.019493332753578823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,511,0.046949331959088646
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,63,0.019487999379634857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,1023,0.06145066519578298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,127,0.015552000453074774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,7,0.015861333658297855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,2047,0.0844693382581075
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,2047,0.04077333211898804
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,255,0.013429333766301474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,255,0.029919999341169994
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,4095,0.12638399998346964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,511,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,8191,0.21150932709376016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,8191,0.11096533139546712
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,1023,0.026181332767009735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,1,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,float16,16383,0.5832053422927856
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,16383,0.20523732900619507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,1,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,3,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,3,0.00956266683836778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,7,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,32,8,128,1,float16,fp8,4095,0.06469866633415222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,7,0.009743999689817429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,31,0.01027199998497963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,15,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,31,0.010357333347201347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,63,0.011999999483426413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,15,0.010768000036478043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,63,0.011850666254758835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,127,0.012058666596810022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,127,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,255,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,511,0.016528000434239704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,511,0.016602666427691776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,255,0.01198400060335795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,1023,0.017562666287024815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,1023,0.017909333109855652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,2047,0.029919999341169994
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,2047,0.029626667499542236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,4095,0.040218666195869446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,4095,0.039893334110577904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,8191,0.02183466653029124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,16383,0.0242399995525678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,8191,0.021909333765506744
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,16383,0.026880001028378803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,32767,0.030271999537944794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,32767,0.033904001116752625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,float16,65535,0.03736533224582672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,1,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,1,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,1,128,1,float16,fp8,65535,0.04316799839337667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,3,0.009541333342591921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,7,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,7,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,15,0.009637333452701569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,3,0.010629333555698395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,15,0.0099093330403169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,31,0.01020800011853377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,31,0.010485333700974783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,63,0.011936000237862269
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,127,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,127,0.012096000214417776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,63,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,255,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,255,0.012138667205969492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,511,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,511,0.016634666671355564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,2047,0.029733332494894665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,1023,0.01775466650724411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,1023,0.017690667261679966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,4095,0.015722667177518208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,2047,0.029493334392706554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,4095,0.013178666432698568
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,8191,0.019130667050679524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,8191,0.015919999529918034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,16383,0.02037866661945979
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,32767,0.02497066557407379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,16383,0.018863999595244724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,32767,0.024400000770886738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,1,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,float16,65535,0.03142400085926056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,2,128,1,float16,fp8,65535,0.0308693324526151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,1,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,3,0.009434666484594345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,3,0.009754666437705358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,7,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,7,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,15,0.009663999701539675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,31,0.010506667196750641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,15,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,31,0.010485333700974783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,63,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,63,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,127,0.01219733307758967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,127,0.01221866657336553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,255,0.012272000312805176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,255,0.01201066623131434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,511,0.016757333030303318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,1023,0.017759999881188076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,2047,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,2047,0.00966933307548364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,511,0.01642666632930438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,1023,0.017605333278576534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,4095,0.015941333025693893
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,4095,0.010778666784365972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,8191,0.019621333728233974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,8191,0.013295999417702356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,16383,0.021664001047611237
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,16383,0.01553600033124288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,32767,0.02277333289384842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,32767,0.035029334326585136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,float16,65535,0.049269333481788635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,1,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,3,0.009578666960199675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,1,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,4,128,1,float16,fp8,65535,0.029264000554879505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,3,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,7,0.00943999985853831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,7,0.009775999933481216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,15,0.009648000200589498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,31,0.010431999961535135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,15,0.010079999764760336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,31,0.01051733394463857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,63,0.0120319997270902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,63,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,127,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,255,0.012181332955757776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,255,0.012170666207869848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,511,0.01664000004529953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,511,0.016613333175579708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,1023,0.013621332744757334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,1023,0.008266666904091835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,127,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,2047,0.015493333339691162
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,4095,0.016442666451136272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,2047,0.009093333035707474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,4095,0.010421333213647207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,8191,0.025466665625572205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,8191,0.0144213338692983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,16383,0.031061333914597828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,32767,0.05036266644795736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,16383,0.017456000049908955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,32767,0.024885334074497223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,1,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,float16,65535,0.07719466586907704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,1,0.009685333197315535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,32,8,128,1,float16,fp8,65535,0.05147199829419454
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,3,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,3,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,7,0.009546666716535887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,7,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,15,0.009898666913310686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,15,0.00985599992175897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,31,0.010496000448862711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,31,0.010480000327030817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,63,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,63,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,127,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,127,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,255,0.01251199965675672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,255,0.012149333953857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,511,0.01685333376129468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,511,0.01682666689157486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,1023,0.029733332494894665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,1023,0.029445332785447437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,2047,0.03751466671625773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,4095,0.019248000035683315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,4095,0.01821333294113477
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,8191,0.023317334552605946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,2047,0.03718933214743932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,16383,0.025546667476495106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,8191,0.0220266655087471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,32767,0.03275199979543686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,16383,0.026522666215896606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,32767,0.03385599950949351
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,1,0.009919999788204828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,float16,65535,0.03968533376852671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,1,128,1,float16,fp8,65535,0.04339733223120371
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,1,0.009578666960199675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,3,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,3,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,7,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,7,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,15,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,15,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,31,0.010384000216921171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,63,0.012250666817029318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,31,0.010533332824707031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,63,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,127,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,255,0.012479999413092932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,127,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,255,0.012181332955757776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,511,0.016735999534527462
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,511,0.016800000021855038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,1023,0.02958933264017105
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,1023,0.029738667110602062
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,2047,0.015754666179418564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,2047,0.011701333026091257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,4095,0.016879999389251072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,4095,0.01320533330241839
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,8191,0.020389333367347717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,8191,0.016117333124081295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,16383,0.022624000906944275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,16383,0.0191040001809597
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,32767,0.035743998984495796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,32767,0.02898666759332021
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,1,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,1,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,float16,65535,0.051221330960591636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,2,128,1,float16,fp8,65535,0.036890665690104164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,3,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,3,0.009914666414260864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,7,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,7,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,15,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,15,0.010133333504199982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,31,0.010458666831254959
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,63,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,127,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,127,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,255,0.012458667159080505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,63,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,511,0.016794666647911072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,511,0.016656000167131424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,255,0.01219733307758967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,1023,0.013733333597580591
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,1023,0.008565333361426989
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,2047,0.009866666669646898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,4095,0.011130666981140772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,2047,0.01600533351302147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,8191,0.015024000157912573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,16383,0.031541332602500916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,8191,0.02604266752799352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,4095,0.01699200024207433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,16383,0.018277333428462345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,1,0.009637333452701569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,32767,0.05160533388455709
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,3,0.009557333464423815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,65535,0.05231999854246775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,float16,65535,0.07947200040022533
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,1,0.009861333295702934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,4,128,1,float16,fp8,32767,0.026650667190551758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,3,0.009925333162148794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,7,0.009637333452701569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,7,0.009866666669646898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,15,0.010138666878143946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,31,0.010693332801262537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,63,0.012362666428089142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,15,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,31,0.010677333921194077
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,63,0.012186666329701742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,127,0.012426666915416718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,127,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,255,0.012335999558369318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,511,0.013007999708255133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,255,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,1023,0.013754667093356451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,511,0.007770666852593422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,1023,0.008469333251317343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,2047,0.02077866718173027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,2047,0.010421333213647207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,4095,0.01198400060335795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,4095,0.023973333338896435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,8191,0.03509866694609324
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,8191,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,16383,0.0643039991458257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,16383,0.028373333315054577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,32767,0.08044800162315369
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,32767,0.04167466859022776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,1,0.025775998830795288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,float16,65535,0.13380266229311624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,32,8,128,1,float16,fp8,65535,0.07308800021807353
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,1,0.02604266752799352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,3,0.02672533442576726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,3,0.02717333287000656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,7,0.027658666173617046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,7,0.027664000789324444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,15,0.03458133339881897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,15,0.03460799902677536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,63,0.03504000107447306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,63,0.03489600121974945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,127,0.009888000165422758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,127,0.012410666793584824
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,255,0.0120319997270902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,31,0.035061334570248924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,255,0.016549333930015564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,31,0.03462400039037069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,511,0.023728000621000927
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,511,0.022485333184401195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,1023,0.03409600009520849
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,1023,0.0413973331451416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,2047,0.05584000051021576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,2047,0.05899733304977417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,4095,0.08656000097592671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,4095,0.11238400141398112
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,float16,8191,0.12264532844225566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,1,0.025941332181294758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,1,0.02619733413060506
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,1,128,1,float16,fp8,8191,0.137855996688207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,3,0.026837334036827087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,3,0.026906666656335194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,7,0.02773333340883255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,15,0.03449599941571554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,15,0.03454400102297465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,31,0.035029334326585136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,7,0.027765333652496338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,31,0.034858666360378265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,63,0.03526933242877325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,63,0.03500800083080927
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,127,0.008357333640257517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,255,0.010768000036478043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,127,0.011413333316644033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,255,0.021482666333516438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,511,0.018965333700180054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,1023,0.047653332352638245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,511,0.0290133332212766
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,1023,0.03316266586383184
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,2047,0.06697600086530049
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,2047,0.04524266719818115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,4095,0.06968000034491222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,4095,0.08701333403587341
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,float16,8191,0.12891200184822083
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,2,128,1,float16,fp8,8191,0.08819733063379924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,1,0.0262773334980011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,1,0.025749333202838898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,3,0.026799999177455902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,3,0.027087998886903126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,7,0.02759466568628947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,15,0.03479466587305069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,15,0.03465600063403448
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,7,0.02794133375088374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,31,0.03492266684770584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,63,0.03517866631348928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,63,0.03498133271932602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,31,0.03474666674931844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,127,0.015962666521469753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,127,0.007541333635648091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,255,0.030373332401116688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,511,0.02187199890613556
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,511,0.04718933502833048
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,1023,0.06182399888833364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,255,0.015210667004187902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,2047,0.10145599643389384
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,1023,0.029781334102153778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,2047,0.0432586669921875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,4095,0.12796800335248312
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,4095,0.06723733246326447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,fp8,8191,0.11389866471290588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,4,128,1,float16,float16,8191,0.21500800053278604
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,1,0.02640533447265625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,1,0.025829332570234936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,3,0.026736001173655193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,3,0.02722666660944621
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,7,0.027818667391935985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,7,0.02805333336194356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,15,0.034858666360378265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,15,0.03482133398453394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,31,0.035061334570248924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,31,0.034832000732421875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,63,0.03516799956560135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,63,0.03501333296298981
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,127,0.025514667232831318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,127,0.011312000453472137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,255,0.049770668148994446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,255,0.020143999407688778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,511,0.06880533198515575
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,511,0.02678400029738744
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,1023,0.04121066629886627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,2047,0.12956266601880392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,2047,0.06473066906134288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,1023,0.08227199812730153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,4095,0.21577600638071695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,4095,0.11161599556605022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,float16,8191,0.5854719877243042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,1,0.009733333562811216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,3,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,1,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,7,0.009882666791478792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,3,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,32,8,128,1,float16,fp8,8191,0.20593067010243735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,7,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,15,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,15,0.010159999753038088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,31,0.010512000570694605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,31,0.010656000425418219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,63,0.012453333785136541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,63,0.012047999848922094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,127,0.012122667084137598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,127,0.012383999923865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,1023,0.03254933406909307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,255,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,511,0.027978666126728058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,511,0.028031999866167705
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,255,0.012485332787036896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,1023,0.03262399882078171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,2047,0.02014933278163274
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,2047,0.015781333049138386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,4095,0.02143466720978419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,4095,0.018165333817402523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,8191,0.02222399910291036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,8191,0.025077333052953083
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,16383,0.027600000301996868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,16383,0.026709333062171936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,32767,0.04826133449872335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,32767,0.04937600096066793
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,1,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,1,0.009701333319147428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,float16,65535,0.06509333352247874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,3,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,1,128,1,float16,fp8,65535,0.06378133098284404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,3,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,7,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,15,0.00984533317387104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,7,0.009824000298976898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,31,0.010591999938090643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,15,0.010026666646202406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,31,0.010490667074918747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,63,0.012495999534924826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,127,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,127,0.012272000312805176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,63,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,255,0.012586666891972223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,255,0.012272000312805176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,511,0.028117333849271137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,511,0.028058665494124096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,1023,0.01360000049074491
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,2047,0.017445333302021027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,1023,0.010373333469033241
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,4095,0.019258666783571243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,2047,0.011610666910807291
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,4095,0.013471999516089758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,8191,0.027514666318893433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,8191,0.01830400029818217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,16383,0.03521066655715307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,16383,0.0227360005180041
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,32767,0.054773335655530296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,32767,0.041237334410349526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,1,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,1,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,float16,65535,0.08262399832407634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,2,128,1,float16,fp8,65535,0.06776000062624614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,3,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,3,0.00997866690158844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,7,0.009674666449427605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,7,0.009888000165422758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,15,0.009808000177145004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,15,0.010010666524370512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,63,0.012416000167528788
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,31,0.010512000570694605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,31,0.010591999938090643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,63,0.012367999802033106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,127,0.012202666451533636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,255,0.01267733300725619
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,127,0.012181332955757776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,255,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,511,0.013077333569526672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,511,0.008074666683872541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,1023,0.013274667163689932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,1023,0.008778666456540426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,2047,0.010949333508809408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,2047,0.022261333962281544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,4095,0.025610665480295818
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,4095,0.01268799975514412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,8191,0.036917333801587425
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,8191,0.017242666333913803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,16383,0.06753600140412648
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,16383,0.030202666918436687
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,32767,0.08332266906897227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,32767,0.04465066889921824
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,float16,65535,0.13618666927019754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,1,0.00984533317387104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,4,128,1,float16,fp8,65535,0.0743093341588974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,3,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,1,0.009877333417534828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,3,0.010069333637754122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,7,0.00961599995692571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,7,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,15,0.009893333539366722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,31,0.010480000327030817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,15,0.010112000008424124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,31,0.010773333410422007
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,63,0.012400000045696894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,127,0.012416000167528788
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,63,0.012410666793584824
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,127,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,255,0.01303999995191892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,511,0.013141332815090815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,255,0.008090666805704435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,1023,0.019333332777023315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,511,0.007893333211541176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,1023,0.009434666484594345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,2047,0.02846933404604594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,2047,0.01157333329319954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,4095,0.040922666589419045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,4095,0.01842133328318596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,8191,0.059546664357185364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,8191,0.02502399931351344
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,16383,0.04426133135954539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,16383,0.1063253382841746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,32767,0.1290666659673055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,32767,0.06704533100128174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,float16,65535,0.22040534019470215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,1,0.047482664386431374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,32,8,128,1,float16,fp8,65535,0.11211733023325603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,3,0.048986668388048805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,3,0.04971200227737427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,7,0.051072001457214355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,7,0.051407997806866966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,15,0.06379733482996623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,1,0.048298666874567665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,15,0.06347733239332835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,31,0.06433066725730896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,31,0.06386133531729381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,63,0.06442666550477345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,63,0.06418666740258534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,127,0.013013333082199097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,127,0.010101333260536194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,255,0.0262773334980011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,511,0.0349440003434817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,255,0.021359999974568684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,1023,0.06078400214513143
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,1023,0.07462400197982788
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,2047,0.11121066411336263
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,2047,0.08437866965929668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,511,0.039749334255854286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,1,0.047354668378829956
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,fp8,4095,0.13803199927012125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,3,0.04933333396911621
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,3,0.04978133241335551
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,1,0.04818133513132731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,1,128,1,float16,float16,4095,0.11194133758544922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,7,0.05114666620890299
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,7,0.05153066913286845
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,15,0.06384000182151794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,31,0.06427200138568878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,31,0.06361599763234456
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,15,0.06384000182151794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,63,0.06472533444563548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,63,0.0642986645301183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,127,0.01628799984852473
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,255,0.031136001149813335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,255,0.017952000101407368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,511,0.032261334359645844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,511,0.04870399832725525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,127,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,1023,0.0692853331565857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,2047,0.060229331254959106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,2047,0.08691199620564778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,1023,0.04524266719818115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,float16,4095,0.12898666659990946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,2,128,1,float16,fp8,4095,0.08775466680526733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,1,0.04762133459250132
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,3,0.049082666635513306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,1,0.048309331138928734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,3,0.049957334995269775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,7,0.05116266508897146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,7,0.05160533388455709
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,15,0.0640533318122228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,31,0.06386666496594746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,15,0.06381866832574208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,31,0.06422399977842967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,63,0.06479466458161671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,63,0.06458666423956554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,127,0.02603200078010559
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,255,0.021962667504946392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,255,0.05087466537952423
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,127,0.01251199965675672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,511,0.06517866750558217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,511,0.029717333614826202
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,1023,0.04372799893220266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,1023,0.08438400427500407
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,2047,0.13080533345540366
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,2047,0.088837335507075
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,1,0.047557334105173744
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,float16,4095,0.2190613349278768
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,1,0.04833599925041199
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,4,128,1,float16,fp8,4095,0.1153706709543864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,3,0.049045334259668984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,7,0.051274667183558144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,7,0.05150933563709259
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,3,0.05023466547330221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,15,0.0640533318122228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,15,0.0639466643333435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,63,0.06471999982992808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,31,0.06453333298365276
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,127,0.04563199977080027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,63,0.06479999919732411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,127,0.016143999993801117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,31,0.06423999865849812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,255,0.054101333022117615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,255,0.022656001150608063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,511,0.07271466652552287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,2047,0.20685333013534546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,1023,0.16011200348536173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,511,0.03769599894682566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,float16,4095,0.3757919867833455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,2047,0.1088106632232666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,1023,0.06132266422112783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,32,8,128,1,float16,fp8,4095,0.2009013295173645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,1,0.09016000231107076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,1,0.09172800183296204
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,3,0.09314666191736858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,7,0.09717866778373718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,3,0.09418666362762451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,15,0.12193600336710612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,7,0.0981226662794749
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,15,0.1218239963054657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,31,0.12258133292198181
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,31,0.12192533413569133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,63,0.12318933010101318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,63,0.12271466851234436
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,127,0.01770666614174843
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,255,0.03920533259709676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,255,0.03892799963553747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,127,0.02048533285657565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,511,0.06081599990526835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,511,0.07579199969768524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,float16,1023,0.0848586658636729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,1,128,1,float16,fp8,1023,0.11293333768844604
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,1,0.0901759962240855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,1,0.09168000022570293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,15,0.1218986709912618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,3,0.09302933017412822
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,7,0.09693866968154907
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,3,0.09435733159383138
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,15,0.12191466490427653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,31,0.12261866529782613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,31,0.12190399567286174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,7,0.0981760025024414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,63,0.12308266758918762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,255,0.05229333539803823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,63,0.12265066305796306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,255,0.03192000091075897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,127,0.027082666754722595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,127,0.015178666760524115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,511,0.06734933455785115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,float16,1023,0.10348266363143921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,1023,0.06080533564090729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,1,0.09174933036168416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,2,128,1,float16,fp8,511,0.04710933566093445
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,1,0.09027199943860371
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,3,0.09317866961161296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,7,0.09711466232935588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,3,0.09449066718419392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,7,0.0981226662794749
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,15,0.12210667133331299
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,15,0.12202133735020955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,63,0.12317867080370586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,127,0.017946666727463405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,31,0.12220799922943115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,255,0.055786664287249245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,127,0.04741866886615753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,63,0.12339733044306438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,31,0.12289599577585857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,255,0.02605866640806198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,511,0.0517546683549881
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,511,0.07506666580835979
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,3,0.09500799576441447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,fp8,1023,0.06404266754786174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,1,0.09038399656613667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,4,128,1,float16,float16,1023,0.11774933338165283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,1,0.09195199608802795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,7,0.09738666812578838
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,7,0.09883733590443929
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,3,0.09346133470535278
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,15,0.12238933642705281
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,15,0.1225440005461375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,63,0.12331733107566833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,31,0.12330133716265361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,127,0.08273066580295563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,63,0.13064533472061157
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,31,0.1225493351618449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,255,0.09537067015965779
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,511,0.16702934106191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,255,0.04298666616280874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,127,0.03084266682465871
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,float16,1023,0.21237866083780924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,511,0.0662720004717509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,32,8,128,1,float16,fp8,1023,0.11247467001279195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,1,0.1744426687558492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,1,0.17812800407409668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,3,0.1800160010655721
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,3,0.1826080083847046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,7,0.19108267625172934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,7,0.18907199303309122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,15,0.23826666673024496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,31,0.2383093237876892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,15,0.23865065972010294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,63,0.24062933524449667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,31,0.23937066396077475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,63,0.23971732457478842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,127,0.035642666121323906
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,255,0.08566400408744812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,511,0.12543466687202454
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,fp8,127,0.032602667808532715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,3,0.18013334274291992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,255,0.06671466430028279
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,1,0.17776000499725342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,1,128,1,float16,float16,511,0.08569066723187764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,1,0.17442133029301962
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,3,0.18255466222763062
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,7,0.18895467122395834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,15,0.23849066098531088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,31,0.23964800437291464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,15,0.23882667223612467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,7,0.1913386583328247
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,127,0.05353599786758423
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,63,0.240064005057017
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,63,0.24150933821996054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,31,0.23852266867955527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,127,0.027434666951497395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,511,0.0665226678053538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,1,0.17462400595347086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,255,0.05835199852784475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,fp8,255,0.04127999891837438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,1,0.17802667617797852
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,3,0.1802133321762085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,3,0.18290666739145914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,2,128,1,float16,float16,511,0.07824533184369405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,7,0.1892426609992981
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,7,0.19178134202957153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,15,0.23875733216603598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,15,0.23901333411534628
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,31,0.24049599965413412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,127,0.09060266613960266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,31,0.23897600173950195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,63,0.251093327999115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,127,0.0372533326347669
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,255,0.09914666414260864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,63,0.24141865968704224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,255,0.04960533479849497
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,3,0.18075199921925864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,1,0.17482133706410727
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,1,0.17838933070500693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,float16,511,0.13529066244761148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,4,128,1,float16,fp8,511,0.093941330909729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,3,0.18356800079345703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,7,0.18951465686162314
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,7,0.19242133696873984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,15,0.2410879929860433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,63,0.25335999329884845
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,15,0.2398293415705363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,63,0.25782400369644165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,31,0.25148266553878784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,127,0.15371732910474142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,31,0.24320000410079956
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,255,0.17804799477259317
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,127,0.05796800057093302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,255,0.07761066655317943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,1,0.009039999917149544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,3,0.00919999989370505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,3,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,fp8,511,0.16159466902414957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,1,0.010250666489203772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,32,8,128,1,float16,float16,511,0.24448533852895102
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,7,0.00922133338948091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,7,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,31,0.011519999553759893
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,15,0.009935999910036722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,15,0.010106666634480158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,127,0.011685332904259363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,31,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,63,0.011754666765530905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,127,0.011839999506870905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,63,0.011941333611806234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,255,0.013503999759753546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,255,0.013765333841244379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,511,0.01905599981546402
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,511,0.019173332800467808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,1023,0.015829333414634068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,1023,0.013738666971524557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,4095,0.021695998807748158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,2047,0.019893333315849304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,4095,0.018298666924238205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,8191,0.03312533348798752
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,2047,0.015829333414634068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,8191,0.03259733319282532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,32767,0.0662666658560435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,16383,0.0418453315893809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,32767,0.06651733318964641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,16383,0.03966933240493139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,float16,65535,0.10099200407663982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,1,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,1,128,1,float16,fp8,65535,0.11102400223414104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,1,0.00926399976015091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,3,0.008997333546479544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,7,0.009248000259200731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,31,0.011642667154471079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,15,0.01009599988659223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,3,0.010442666709423065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,7,0.01032533310353756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,15,0.011152000476916632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,63,0.011749333391586939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,31,0.011792000383138657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,63,0.01191466674208641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,127,0.011765333513418833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,127,0.011861333002646765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,511,0.012938667088747025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,255,0.013647999614477158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,511,0.009413333609700203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,255,0.013376000026861826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,2047,0.022592000663280487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,1023,0.013786666095256805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,1023,0.010858666151762009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,2047,0.01332266628742218
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,8191,0.03754666695992152
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,4095,0.015306666493415833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,4095,0.026346666117509205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,8191,0.02714666724205017
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,16383,0.05619200070699056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,16383,0.0354720006386439
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,32767,0.08443733056386311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,32767,0.061119998494784035
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,float16,65535,0.1378506620724996
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,3,0.009178666397929192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,3,0.009397333487868309
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,1,0.010117333382368088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,1,0.010431999961535135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,2,128,1,float16,fp8,65535,0.11362666885058086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,7,0.009349333122372627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,15,0.009973333527644476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,7,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,15,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,31,0.011733333269755045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,127,0.011855999628702799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,63,0.011839999506870905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,31,0.012063999970753988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,127,0.01184533288081487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,255,0.012975999464591345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,63,0.012698666503032049
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,255,0.007834666719039282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,511,0.013455999394257864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,2047,0.02865600089232127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,511,0.008325333396593729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,2047,0.012906666845083237
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,1023,0.019130667050679524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,4095,0.019968000551064808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,1023,0.010901333143313726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,4095,0.04251199960708618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,8191,0.059621334075927734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,8191,0.02775466690460841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,16383,0.0900320013364156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,16383,0.046911999583244324
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,32767,0.06910933554172516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,32767,0.17281599839528403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,float16,65535,0.22240533431371054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,1,0.00922133338948091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,1,0.009136000027259191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,3,0.009599999835093817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,3,0.009290666629870733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,4,128,1,float16,fp8,65535,0.114464004834493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,7,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,7,0.009141333401203156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,15,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,15,0.010197333370645842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,31,0.011701333026091257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,63,0.011861333002646765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,31,0.012080000092585882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,127,0.010559999694426855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,127,0.006453333422541618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,63,0.011978667229413986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,255,0.013477332890033722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,255,0.007685333490371704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,511,0.019296000401178997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,511,0.008896000062425932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,1023,0.014175999909639359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,2047,0.04334933559099833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,1023,0.028250666956106823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,2047,0.017722666263580322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,4095,0.06701866785685222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,4095,0.02741866558790207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,8191,0.08568533261617024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,16383,0.1699733336766561
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,16383,0.06597333153088887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,8191,0.04192533095677694
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,32767,0.2126506765683492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,32767,0.11275200049082439
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,float16,65535,0.38097067674001056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,32,8,128,1,float16,fp8,65535,0.20838399728139242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,1,0.34462932745615643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,1,0.350655992825826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,3,0.35580265522003174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,3,0.3611786762873332
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,7,0.37255998452504474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,7,0.3763306538263957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,15,0.4712586800257365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,15,0.4723840157190959
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,31,0.4718506733576457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,63,0.48416535059611004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,31,0.4742559989293416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,127,0.06660266717274983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,63,0.4757973353068034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,127,0.07328000168005626
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,float16,255,0.07614399989446004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,1,0.34441065788269043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,1,128,1,float16,fp8,255,0.10745066404342651
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,3,0.3609600067138672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,3,0.35579200585683185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,7,0.3726133505503337
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,1,0.3505013386408488
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,7,0.3767999807993571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,15,0.47246400515238446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,31,0.48524800936381024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,63,0.48815464973449707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,63,0.49037333329518634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,127,0.06745600203673045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,15,0.4730079968770345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,31,0.47414398193359375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,127,0.0897759993871053
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,float16,255,0.10599999626477559
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,1,0.3442026774088542
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,2,128,1,float16,fp8,255,0.07993599772453308
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,1,0.35097599029541016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,3,0.35596267382303876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,3,0.3608320156733195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,7,0.37718931833902997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,7,0.37811732292175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,15,0.48122668266296387
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,15,0.475381334622701
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,31,0.4899786710739136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,31,0.48559999465942383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,63,0.4936480124791463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,127,0.06797333558400472
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,255,0.1864373286565145
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,float16,127,0.16086933016777039
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,255,0.08822400371233623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,4,128,1,float16,fp8,63,0.5002719958623251
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,1,0.3450453281402588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,3,0.3614186843236287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,1,0.3515626589457194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,3,0.3619786500930786
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,7,0.3936906655629476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,15,0.48656535148620605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,31,0.49531201521555585
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,7,0.388373335202535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,15,0.4875200192133586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,31,0.4944373369216919
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,63,0.49875199794769287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,127,0.2958186666170756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,63,0.4987039963404338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,127,0.10857066512107849
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,float16,255,0.3445653518040975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,32,8,128,1,float16,fp8,255,0.17624000708262125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,fp8,1,0.705952008565267
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,float16,3,0.7230453491210938
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,float16,1,0.6954240004221598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,fp8,3,0.7290879885355631
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,fp8,7,0.7656853199005127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,float16,7,0.7606773376464844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,float16,15,0.9499306678771973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,fp8,15,0.952783981959025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,float16,31,0.960746685663859
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,fp8,31,0.9567519823710123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,float16,63,0.9671733379364014
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,fp8,63,0.9678346316019694
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,float16,127,0.12343999743461609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,float16,1,0.7063626448313395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,1,128,1,float16,fp8,127,0.18348799149195352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,fp8,1,0.7280213038126627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,fp8,3,0.7384853363037109
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,float16,3,0.7343413035074869
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,float16,7,0.7701546351114908
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,fp8,7,0.7793013254801432
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,float16,15,0.9537599881490072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,fp8,15,0.9574986298878988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,float16,31,0.9652427037556967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,fp8,31,0.9735519886016846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,float16,63,0.9726826349894205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,fp8,63,0.9750400384267172
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,float16,127,0.16978132724761963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,2,128,1,float16,fp8,127,0.12987200419108072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,float16,1,0.7220053672790527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,fp8,1,0.7354239622751871
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,float16,3,0.7426239649454752
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,fp8,3,0.7679306666056315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,float16,7,0.7752479712168375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,fp8,7,0.78765336672465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,float16,15,0.9589493274688721
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,float16,31,0.9715572992960612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,fp8,15,0.9648906389872233
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,fp8,31,0.9719573656717936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,float16,63,0.9792799949645996
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,fp8,63,0.9928853511810303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,float16,127,0.3112799922625224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,4,128,1,float16,fp8,127,0.12760000427563986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,float16,1,0.7315999666849772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,fp8,1,0.7468053499857584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,fp8,3,0.7630506356557211
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,float16,7,0.7833866278330485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,float16,3,0.7505226929982504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,fp8,7,0.8097493648529053
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,float16,15,0.965557336807251
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,fp8,15,0.9748426278432211
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,float16,31,0.9824426968892416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,fp8,31,0.9818346500396729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,1,0.01479999969402949
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,float16,63,0.9901813666025797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,fp8,63,0.9911893208821615
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,3,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,3,0.01540800059835116
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,7,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,fp8,127,0.23069866498311362
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,7,0.015397333850463232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,15,0.016250666230916977
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,15,0.016645333419243496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,32,8,128,1,float16,float16,127,0.5786186854044596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,31,0.019871999820073444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,31,0.019925333559513092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,63,0.01989866668979327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,127,0.020128000527620316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,63,0.019962667177120846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,127,0.02000533292690913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,255,0.02349333216746648
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,511,0.01509333277742068
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,255,0.023503998915354412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,511,0.01268799975514412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,1023,0.015664000064134598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,1023,0.013973332941532135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,2047,0.024293333292007446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,2047,0.02609066665172577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,4095,0.0312266672650973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,4095,0.028234665592511494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,8191,0.04418133199214935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,8191,0.046757335464159645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,16383,0.06913599868615468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,16383,0.07694399853547414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,float16,32767,0.10604266325632732
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,1,0.014901333798964819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,1,128,1,float16,fp8,32767,0.1346933344999949
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,1,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,3,0.015168000012636185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,7,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,7,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,3,0.015311999867359797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,15,0.016575999557971954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,15,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,31,0.01988799994190534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,63,0.019973333925008774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,63,0.019904000063737232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,31,0.019962667177120846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,127,0.020101333657900494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,255,0.013002666334311167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,511,0.013461332768201828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,255,0.009098666409651438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,127,0.020165332903464634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,1023,0.019466667125622433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,511,0.009925333162148794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,2047,0.02956799914439519
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,2047,0.020128000527620316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,1023,0.011946666985750198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,4095,0.0431573341290156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,4095,0.023930666347344715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,8191,0.06035199761390686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,8191,0.03855466594298681
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,16383,0.09104532996813457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,1,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,1,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,float16,32767,0.13226133584976196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,16383,0.06372799972693126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,2,128,1,float16,fp8,32767,0.1109920044740041
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,3,0.015178666760524115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,3,0.015423999478419622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,7,0.015295999745527903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,7,0.015583999454975128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,15,0.016362667083740234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,15,0.016688000410795212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,31,0.019999999552965164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,63,0.019909333437681198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,31,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,63,0.019925333559513092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,127,0.007061333085099856
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,127,0.010709332923094431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,255,0.013621332744757334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,255,0.00816000004609426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,511,0.019637333850065868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,511,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,2047,0.043925335009892784
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,1023,0.015365333606799444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,1023,0.028021333118279774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,2047,0.019248000035683315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,4095,0.06779733300209045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,4095,0.030762667457262676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,8191,0.08796800176302592
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,8191,0.04418666660785675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,16383,0.12706133723258972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,1,0.01479999969402949
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,1,0.015279999623696009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,16383,0.06805333495140076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,fp8,32767,0.11482133467992146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,4,128,1,float16,float16,32767,0.3089066743850708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,3,0.015205333630243937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,3,0.015317333241303762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,7,0.015210667004187902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,7,0.015498666713635126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,15,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,31,0.019925333559513092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,63,0.02004266654451688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,15,0.016672000288963318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,31,0.020015999674797058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,63,0.01995733380317688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,127,0.010725333044926325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,127,0.00701333334048589
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,255,0.02073066681623459
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,255,0.008613333106040955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,511,0.027989332874615986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,1023,0.020293333878119785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,511,0.013573333621025085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,1023,0.045978665351867676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,2047,0.06590400139490764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,2047,0.02625600000222524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,4095,0.08443199594815572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,4095,0.040778666734695435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,8191,0.12618666887283325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,8191,0.06503466765085857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,32767,0.3796373208363851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,float16,16383,0.21031999588012695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,32767,0.20522665977478027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,1,0.014778666198253632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,32,8,128,1,float16,fp8,16383,0.15498133500417074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,1,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,3,0.015386667102575302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,3,0.015386667102575302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,7,0.015279999623696009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,15,0.016730666160583496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,31,0.019941333681344986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,63,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,15,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,31,0.019850666324297588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,7,0.015552000453074774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,63,0.019925333559513092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,127,0.020186666399240494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,255,0.009317333499590555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,127,0.02015999952952067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,255,0.012949333836634954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,511,0.013343999783198038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,511,0.0107893335322539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,1023,0.011994666109482447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,1023,0.01941866676012675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,2047,0.019973333925008774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,2047,0.02940266579389572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,4095,0.02409599969784419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,4095,0.04293866455554962
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,16383,0.09177066882451375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,8191,0.06136533121267954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,8191,0.04120533416668574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,16383,0.06354666749636333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,1,0.014981333166360855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,1,0.015184000134468079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,fp8,32767,0.09847467144330342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,1,128,1,float16,float16,32767,0.1322719951470693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,3,0.015189333508412043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,3,0.01533866673707962
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,7,0.015109332899252573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,15,0.016645333419243496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,15,0.01657066618402799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,7,0.015477333217859268
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,31,0.02000533292690913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,31,0.01987733319401741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,63,0.020175999651352566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,255,0.013530666629473368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,127,0.007151999821265538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,63,0.019989332805077236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,127,0.010490667074918747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,255,0.009093333035707474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,511,0.01942933350801468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,511,0.009599999835093817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,2047,0.04417600234349569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,1023,0.015573333948850632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,4095,0.07318399846553802
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,1023,0.027813332776228588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,2047,0.019093333433071773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,8191,0.08756267031033833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,4095,0.029663999875386555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,8191,0.044218664367993675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,16383,0.12797866264979044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,16383,0.06765333314736684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,1,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,1,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,float16,32767,0.3097013235092163
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,3,0.015546667079130808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,2,128,1,float16,fp8,32767,0.11505599816640218
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,3,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,7,0.015541333705186844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,7,0.01515199989080429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,15,0.016821333517630894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,31,0.019866666446129482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,15,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,31,0.019871999820073444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,63,0.020053333292404812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,63,0.02027200038234393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,127,0.010709332923094431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,127,0.006815999746322632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,255,0.02080533280968666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,255,0.008522666369875273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,511,0.013584000368913015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,511,0.028181334336598713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,1023,0.0459146648645401
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,2047,0.06651733318964641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,1023,0.020479999482631683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,2047,0.026554666459560394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,4095,0.0853706697622935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,4095,0.05087466537952423
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,8191,0.1263146698474884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,8191,0.06447466711203258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,16383,0.11121066411336263
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,16383,0.21170133352279663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,fp8,32767,0.20438400904337564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,4,128,1,float16,float16,32767,0.3811519940694173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,1,0.01544533297419548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,7,0.01571200042963028
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,3,0.015482666591803232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,15,0.016927999754746754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,31,0.020143999407688778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,63,0.020202666521072388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,1,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,127,0.008432000254591307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,3,0.015290666371583939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,127,0.015413332730531693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,63,0.020090666910012562
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,31,0.019839999576409657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,255,0.03027733415365219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,511,0.04704533517360687
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,255,0.013669333110253016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,15,0.01674666628241539
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,511,0.01966399947802226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,7,0.015370666980743408
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,2047,0.08562133709589641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,1023,0.02568000058333079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,1023,0.061887999375661217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,8191,0.2121653358141581
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,16383,0.379802664120992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,16383,0.2917706569035848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,4095,0.12737600008646646
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,32767,0.39079999923706055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,2047,0.040234667559464775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,float16,32767,0.7163253625233968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,4095,0.06418666740258534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,1,0.009589333087205887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,16,8,128,1,float16,fp8,8191,0.11053867141405742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,1,0.009461333354314169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,3,0.009445333232482275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,3,0.009429333110650381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,7,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,15,0.009535999968647957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,15,0.009888000165422758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,7,0.010304000228643417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,31,0.010314666976531347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,31,0.010351999973257383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,63,0.012053333222866058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,127,0.012047999848922094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,63,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,255,0.012256000190973282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,127,0.011909333368142446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,511,0.016586666305859882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,511,0.016469333320856094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,255,0.011877333124478659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,1023,0.017488000293572743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,1023,0.0174346665541331
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,2047,0.017722666263580322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,2047,0.01757866640885671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,4095,0.029472000896930695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,4095,0.029338667790095013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,8191,0.01575999955336253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,8191,0.018687999496857326
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,16383,0.0204373337328434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,16383,0.019061333189407986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,32767,0.023930666347344715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,32767,0.0242399995525678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,65535,0.029578665892283123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,65535,0.03049066662788391
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,float16,131071,0.03997333347797394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,1,0.00933333362142245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,1,0.009599999835093817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,3,0.009183999771873156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,1,128,1,float16,fp8,131071,0.04048533240954081
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,7,0.009301333377758661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,3,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,7,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,15,0.009370666618148485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,15,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,31,0.010368000095089277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,31,0.010245333115259806
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,63,0.01191466674208641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,63,0.012096000214417776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,127,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,127,0.011882666498422623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,255,0.011994666109482447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,255,0.011936000237862269
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,511,0.016565332810084026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,511,0.01646399994691213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,1023,0.017605333278576534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,2047,0.017946666727463405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,1023,0.017456000049908955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,2047,0.017504000415404636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,4095,0.01516266663869222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,4095,0.010746666540702185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,8191,0.01886933296918869
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,8191,0.012789333860079447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,16383,0.020096000283956528
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,16383,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,32767,0.019098666807015736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,32767,0.024720000723997753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,65535,0.03145066648721695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,65535,0.024031999210516613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,1,0.009418666362762451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,1,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,fp8,131071,0.03566399961709976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,2,128,1,float16,float16,131071,0.06070933242638906
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,3,0.00933333362142245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,7,0.009173333023985228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,3,0.009535999968647957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,7,0.009557333464423815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,15,0.00943999985853831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,15,0.009898666913310686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,31,0.010309333602587381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,31,0.01028266673286756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,63,0.01201066623131434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,63,0.011792000383138657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,127,0.011968000481526056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,255,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,127,0.011941333611806234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,255,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,511,0.016410666207472484
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,511,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,1023,0.017642666896184284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,1023,0.017594666530688603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,2047,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,2047,0.00897066667675972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,4095,0.01044800008336703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,8191,0.012576000144084295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,8191,0.019679999599854153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,4095,0.01568000018596649
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,16383,0.021231998999913532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,16383,0.01481066644191742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,32767,0.035029334326585136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,32767,0.021888000269730885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,65535,0.04930133124192556
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,65535,0.028255999088287354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,1,0.009461333354314169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,1,0.009290666629870733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,float16,131071,0.08185600241025288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,3,0.009317333499590555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,3,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,4,128,1,float16,fp8,131071,0.05593066910902659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,7,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,7,0.009541333342591921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,15,0.009482666850090027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,15,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,31,0.010346666599313417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,31,0.010389333590865135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,63,0.01210133358836174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,127,0.012202666451533636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,63,0.012096000214417776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,127,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,255,0.012063999970753988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,255,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,511,0.016442666451136272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,511,0.016554666062196095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,1023,0.013728000223636627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,2047,0.009269333134094873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,2047,0.01543466622630755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,4095,0.016255999604860943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,8191,0.02587733417749405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,8191,0.014229333649079004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,4095,0.010565333068370819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,16383,0.03125333289305369
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,16383,0.017375999440749485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,32767,0.0602453351020813
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,32767,0.02455466737349828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,1023,0.008287999778985977
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,65535,0.07879466811815898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,65535,0.05158400038878123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,float16,131071,0.13361600041389465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,16,8,128,1,float16,fp8,131071,0.06964266796906789
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,1,0.009343999748428663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,1,0.00966933307548364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,3,0.009423999736706415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,3,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,7,0.009370666618148485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,7,0.010037333394090334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,15,0.009775999933481216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,31,0.010288000106811523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,15,0.00996800015370051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,31,0.010437333335479101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,63,0.01219733307758967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,63,0.012138667205969492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,127,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,127,0.012250666817029318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,255,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,255,0.01210133358836174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,511,0.016538667182127636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,1023,0.01775466650724411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,1023,0.017509333789348602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,511,0.016565332810084026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,2047,0.029781334102153778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,2047,0.029711998999118805
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,4095,0.013338666409254074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,4095,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,8191,0.019359999646743137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,16383,0.021018666525681812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,8191,0.01591466615597407
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,16383,0.018922666708628338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,32767,0.02552533398071925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,32767,0.02441066751877467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,65535,0.03262399882078171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,65535,0.03068800022204717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,float16,131071,0.06275199850400288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,1,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,3,0.009509333098928133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,1,0.009701333319147428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,1,128,1,float16,fp8,131071,0.0480320006608963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,3,0.00985599992175897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,7,0.009466666728258133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,7,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,15,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,15,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,31,0.010346666599313417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,31,0.01032533310353756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,63,0.012074666718641916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,63,0.012186666329701742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,127,0.0122079998254776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,127,0.01198400060335795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,255,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,255,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,511,0.016501333564519882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,1023,0.01757866640885671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,511,0.016565332810084026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,2047,0.015311999867359797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,1023,0.01762666677435239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,2047,0.009674666449427605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,4095,0.01664000004529953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,4095,0.010805333654085795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,8191,0.012917333592971167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,16383,0.022117334107557934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,8191,0.019978666057189304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,16383,0.01534933348496755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,32767,0.035786665976047516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,32767,0.022783999641736347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,65535,0.02887466549873352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,65535,0.050901333491007485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,1,0.009429333110650381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,1,0.009637333452701569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,float16,131071,0.08286933104197185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,3,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,3,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,2,128,1,float16,fp8,131071,0.05602133274078369
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,7,0.009824000298976898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,7,0.009663999701539675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,15,0.00985599992175897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,15,0.009685333197315535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,31,0.010362666721145311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,31,0.010522666076819101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,63,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,63,0.01202133297920227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,127,0.012261333564917246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,127,0.012202666451533636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,255,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,255,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,511,0.016517333686351776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,511,0.016586666305859882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,1023,0.013546666751305262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,1023,0.008207999790708223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,2047,0.01597333326935768
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,2047,0.009258666386206945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,4095,0.01699200024207433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,4095,0.010677333921194077
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,8191,0.02584533393383026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,8191,0.014384000251690546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,16383,0.03180266668399175
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,16383,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,32767,0.05161599814891815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,32767,0.024746666351954143
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,65535,0.07898666461308797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,65535,0.05126399795214335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,float16,131071,0.13474133610725403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,1,0.009408000235756239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,1,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,3,0.00979200005531311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,7,0.009461333354314169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,4,128,1,float16,fp8,131071,0.06952000161012013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,3,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,7,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,15,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,15,0.009994666402538618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,31,0.010399999717871347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,63,0.012165332833925882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,31,0.010853332777818045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,127,0.012272000312805176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,63,0.012341332932313284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,127,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,255,0.012245333443085352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,255,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,511,0.012986666212479273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,511,0.007871999715765318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,1023,0.013434667140245438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,1023,0.008522666369875273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,2047,0.0102613332370917
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,2047,0.020970667401949566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,4095,0.024149333437283833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,4095,0.01198400060335795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,8191,0.01603200038274129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,16383,0.05468266705671946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,8191,0.03554133325815201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,16383,0.028160000840822857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,32767,0.08178133269151051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,65535,0.13354667027791342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,32767,0.04162666698296865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,65535,0.07276799778143565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,1,0.014783999572197596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,1,0.015024000157912573
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,float16,131071,0.23384000857671103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,16,8,128,1,float16,fp8,131071,0.11711466312408447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,3,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,3,0.015423999478419622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,7,0.015834666788578033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,7,0.01584533353646596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,15,0.019354666272799175
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,15,0.01934933289885521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,31,0.019391999890406925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,31,0.019402666638294857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,63,0.019546666493018467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,63,0.01941866676012675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,127,0.010672000547250112
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,127,0.007887999837597212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,255,0.009434666484594345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,255,0.01357866699496905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,511,0.01966933285196622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,511,0.01108266661564509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,1023,0.028565332293510437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,1023,0.01988799994190534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,2047,0.045114666223526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,2047,0.027802666028340656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,4095,0.0695306658744812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,4095,0.04412800073623657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,8191,0.08917333682378133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,8191,0.06122133135795593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,1,0.014842666685581207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,fp8,16383,0.08820799986521403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,1,128,1,float16,float16,16383,0.13057066996892294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,1,0.015178666760524115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,3,0.015263999501864115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,3,0.015439999600251516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,7,0.015605332950750986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,7,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,15,0.019600000232458115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,15,0.01930133377512296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,31,0.019546666493018467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,31,0.019359999646743137
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,63,0.01951466624935468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,63,0.01933866615096728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,127,0.010581333190202713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,127,0.006837333242098491
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,255,0.02107733239730199
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,255,0.009248000259200731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,511,0.0281333327293396
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,1023,0.04595200220743815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,1023,0.021962667504946392
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,511,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,2047,0.06633600095907848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,2047,0.029674666623274486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,4095,0.0864533285299937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,4095,0.04298666616280874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,1,0.014794666320085526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,1,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,8191,0.06714666883150737
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,8191,0.127920001745224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,float16,16383,0.3081013361612956
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,3,0.015317333241303762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,3,0.015439999600251516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,2,128,1,float16,fp8,16383,0.11381866534550984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,7,0.015765332927306492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,7,0.015967999895413715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,15,0.01964266722400983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,15,0.019354666272799175
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,31,0.01956266661485036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,31,0.01950399950146675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,63,0.019541333119074505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,63,0.019498666127522785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,127,0.015669333438078564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,127,0.007631999750932057
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,255,0.03030933439731598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,255,0.013434667140245438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,511,0.04637333254019419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,1023,0.06180266539255778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,511,0.01995733380317688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,2047,0.0857919951279958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,2047,0.04074666649103165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,4095,0.12683199842770895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,4095,0.0647680014371872
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,1023,0.026015999416510265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,8191,0.21329599618911743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,8191,0.11137066284815471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,float16,16383,0.5826239983240763
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,1,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,1,0.015205333630243937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,3,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,3,0.015392000476519266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,4,128,1,float16,fp8,16383,0.2047733267148336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,7,0.016176000237464905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,7,0.016037333756685257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,15,0.01937066639463107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,15,0.01961600035429001
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,31,0.01947733387351036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,31,0.019402666638294857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,63,0.01960533360640208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,63,0.01985599969824155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,127,0.025663999219735462
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,255,0.049839998284975685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,127,0.011152000476916632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,255,0.01977066695690155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,511,0.06445866823196411
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,511,0.025802666942278545
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,1023,0.08242133259773254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,1023,0.040394666294256844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,2047,0.08494933446248372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,4095,0.11105599999427795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,2047,0.12823466459910074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,4095,0.2148639957110087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,8191,0.38313066959381104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,8191,0.20413333177566528
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,fp8,16383,0.391269326210022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,16,8,128,1,float16,float16,16383,0.720965305964152
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,1,0.009466666728258133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,1,0.009861333295702934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,7,0.00979200005531311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,7,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,3,0.011120000233252844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,3,0.010506667196750641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,31,0.010608000059922537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,15,0.009786666681369146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,15,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,31,0.010389333590865135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,63,0.012122667084137598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,63,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,127,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,127,0.01209066684047381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,255,0.012469333906968435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,255,0.012117333710193634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,511,0.016650666793187458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,511,0.016800000021855038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,1023,0.029525332152843475
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,1023,0.02961066613594691
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,2047,0.017423999806245167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,4095,0.01803733284274737
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,2047,0.011440000186363855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,4095,0.013397333522637686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,8191,0.02141333371400833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,16383,0.02422400067249934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,8191,0.016074666132529575
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,16383,0.019386666516462963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,32767,0.0283146674434344
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,32767,0.038106667498747505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,65535,0.036981334288915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,1,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,65535,0.053583999474843345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,1,0.00955200009047985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,float16,131071,0.10844799876213074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,1,128,1,float16,fp8,131071,0.07106666763623555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,3,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,7,0.009525333220760027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,7,0.00996800015370051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,3,0.011045332998037338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,31,0.010559999694426855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,15,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,15,0.010757333288590113
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,63,0.012421333541472753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,63,0.012063999970753988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,127,0.012357333054145178
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,255,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,255,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,127,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,511,0.016800000021855038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,1023,0.013541333377361298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,511,0.016762666404247284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,1023,0.008736000085870424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,2047,0.017173333714405697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,2047,0.009610666582981745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,4095,0.01844800015290578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,8191,0.02808533360560735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,4095,0.011136000355084738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,8191,0.015087999403476715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,16383,0.03340800106525421
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,16383,0.0182239996890227
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,32767,0.05365333457787832
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,32767,0.027050666511058807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,65535,0.0812906672557195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,65535,0.05209066470464071
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,float16,131071,0.13714667161305746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,1,0.010005333150426546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,2,128,1,float16,fp8,131071,0.07110933462778728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,1,0.009525333220760027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,3,0.009589333087205887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,3,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,7,0.009509333098928133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,7,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,15,0.009994666402538618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,15,0.01002133327225844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,31,0.01062400018175443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,31,0.010506667196750641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,63,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,63,0.012159999459981918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,127,0.012304000556468964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,127,0.012202666451533636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,255,0.012426666915416718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,255,0.01227733368674914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,511,0.012693333129088083
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,511,0.007781333600481351
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,1023,0.01341333364446958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,1023,0.008378666515151659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,2047,0.022074667116006214
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,2047,0.010224000240365664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,4095,0.025333332518736523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,4095,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,8191,0.03708266715208689
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,8191,0.01603200038274129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,16383,0.055999999245007835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,16383,0.028309332827727
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,32767,0.08306133250395457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,32767,0.04154133299986521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,65535,0.13557866215705872
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,65535,0.07245866457621257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,1,0.009925333162148794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,1,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,float16,131071,0.23587199052174887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,3,0.009557333464423815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,4,128,1,float16,fp8,131071,0.116976002852122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,3,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,7,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,7,0.010069333637754122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,15,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,31,0.0103946669648091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,15,0.010128000130256018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,63,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,63,0.012383999923865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,127,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,127,0.012362666428089142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,255,0.012847999731699625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,255,0.007397333160042763
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,511,0.013093333691358566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,511,0.007807999849319458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,1023,0.019306667149066925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,1023,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,2047,0.028607999285062153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,2047,0.011461333682139715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,4095,0.0421066681543986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,4095,0.01870399961868922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,8191,0.059605335195859276
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,8191,0.02480533222357432
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,16383,0.09103467067082723
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,16383,0.04403733213742574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,32767,0.06653866668542226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,32767,0.12904000282287598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,65535,0.11185066898663838
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,65535,0.3132373293240865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,float16,131071,0.40617601076761883
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,1,0.025920001169045765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,1,0.026208000878492992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,3,0.026698666314284008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,16,8,128,1,float16,fp8,131071,0.2031360069910685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,7,0.0276853342851003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,3,0.027263998985290527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,7,0.027749332288901012
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,15,0.034527999659379326
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,31,0.0349440003434817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,31,0.03489600121974945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,63,0.03517866631348928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,15,0.03492266684770584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,63,0.03492266684770584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,127,0.007893333211541176
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,255,0.02142400046189626
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,255,0.010901333143313726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,511,0.02886933336655299
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,511,0.018960000326236088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,127,0.011999999483426413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,1023,0.04776533444722494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,1023,0.03305600086847941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,2047,0.07392533123493195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,2047,0.044821331898371376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,4095,0.08725333213806152
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,4095,0.06088533500830332
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,float16,8191,0.129120002190272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,1,0.02593066543340683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,1,0.026288000245889027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,3,0.026789332429567974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,3,0.027162666122118633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,1,128,1,float16,fp8,8191,0.087909330924352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,7,0.02771199991305669
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,7,0.027984000742435455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,15,0.03495466709136963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,31,0.03497066597143809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,63,0.03507733345031738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,127,0.016149333367745083
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,15,0.03477866699298223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,127,0.007600000128149986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,31,0.03474666674931844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,63,0.03489600121974945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,255,0.030581332743167877
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,255,0.015184000134468079
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,511,0.047093331813812256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,511,0.02201066662867864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,1023,0.02938666691382726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,4095,0.12838400403658548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,2047,0.043136000633239746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,1023,0.0621919979651769
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,2047,0.10141332944234212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,4095,0.06708799799283345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,float16,8191,0.2157599925994873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,1,0.026000000536441803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,1,0.026447998980681103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,2,128,1,float16,fp8,8191,0.1147093375523885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,3,0.027322667340437572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,7,0.028064000109831493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,3,0.026682667434215546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,7,0.02775999903678894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,15,0.034858666360378265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,15,0.03491200009981791
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,31,0.03499733408292135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,31,0.03495466709136963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,63,0.03525333354870478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,63,0.03503466645876566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,255,0.049423997600873314
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,255,0.020143999407688778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,127,0.01118933285276095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,127,0.025568000972270966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,511,0.06451733410358429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,511,0.02651199946800868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,1023,0.041082667807737984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,1023,0.0830026666323344
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,2047,0.13008532921473184
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,2047,0.06519466638565063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,4095,0.11244266231854756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,4095,0.30929599205652875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,float16,8191,0.38728535175323486
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,1,0.0264533335963885
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,4,128,1,float16,fp8,8191,0.20454400777816772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,1,0.026026666164398193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,3,0.02701333413521449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,3,0.02740799884001414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,7,0.028021333118279774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,15,0.034976000587145485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,15,0.035071998834609985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,7,0.02811199923356374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,31,0.03530666728814443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,63,0.03543466577927271
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,31,0.034976000587145485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,63,0.03533333291610082
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,127,0.04461866617202759
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,255,0.053583999474843345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,127,0.015925332903862
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,255,0.02199466774861018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,511,0.07110400001207988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,511,0.03692800054947535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,1023,0.11483200391133626
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,1023,0.06108266611893972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,2047,0.2056480050086975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,4095,0.37451199690500897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,2047,0.10813867052396138
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,4095,0.2901493310928345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,fp8,8191,0.38765867551167804
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,16,8,128,1,float16,float16,8191,0.7110666433970133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,1,0.04837333162625631
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,3,0.05012266834576925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,1,0.04734933376312256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,7,0.05124266445636749
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,7,0.05161599814891815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,3,0.04924266537030538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,15,0.06408533453941345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,15,0.06388799846172333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,31,0.06461866696675618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,31,0.06398400167624156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,63,0.064560001095136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,127,0.01651200031240781
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,63,0.06469866633415222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,127,0.008799999952316284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,255,0.031136001149813335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,255,0.018042666216691334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,511,0.048714667558670044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,511,0.03232000023126602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,1023,0.06386133531729381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,1023,0.049882665276527405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,fp8,2047,0.060378665725390114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,1,0.04750399788220724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,1,128,1,float16,float16,2047,0.08822400371233623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,1,0.04828266799449921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,3,0.04943466683228811
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,3,0.04993066688378652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,7,0.051311999559402466
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,7,0.05153599878152212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,15,0.06397866706053416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,31,0.06454400221506755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,63,0.06465599934260051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,15,0.06411199768384297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,31,0.0642133355140686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,127,0.026352000733216602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,63,0.06470933556556702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,127,0.012367999802033106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,255,0.05036266644795736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,255,0.022111999491850536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,511,0.06586666901906331
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,1023,0.043765331308046974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,2047,0.08988266189893086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,1023,0.08373333017031352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,fp8,511,0.030058667063713074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,1,0.04743466774622599
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,2,128,1,float16,float16,2047,0.13107200463612875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,1,0.048394665122032166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,3,0.04949333270390829
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,3,0.05022400120894114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,7,0.051269332567850746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,15,0.06426133215427399
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,63,0.06488533318042755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,7,0.051685333251953125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,15,0.06422933439413707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,31,0.0646666685740153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,31,0.06425066788991292
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,63,0.06479466458161671
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,127,0.016042667130629223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,127,0.04557333389918009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,255,0.05395199855168661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,255,0.022815999885400135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,511,0.07329066594441731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,511,0.03766400118668874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,1023,0.11566399534543355
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,1023,0.061280002196629844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,float16,2047,0.20728532473246256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,1,0.04770133395989736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,4,128,1,float16,fp8,2047,0.153029332558314
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,1,0.048826664686203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,3,0.0496319979429245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,3,0.050661335388819374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,7,0.05213333169619242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,7,0.05156266689300537
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,31,0.06480533381303151
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,15,0.06460266808668773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,15,0.0645546664794286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,63,0.06970666845639546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,63,0.06515199939409892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,127,0.08224533498287201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,255,0.10407466689745586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,31,0.06473066906134288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,127,0.029189333319664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,255,0.04297600189844767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,511,0.12971733013788858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,2047,0.5832266807556152
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,511,0.06558933357397716
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,1,0.09179199735323589
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,float16,1023,0.20922134319941202
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,1023,0.11126400033632915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,1,0.0902933379014333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,16,8,128,1,float16,fp8,2047,0.20139733950297037
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,3,0.0932426651318868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,3,0.09436266620953877
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,7,0.09798933068911235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,7,0.09717866778373718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,15,0.12218667070070903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,15,0.1220746636390686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,31,0.12292800347010295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,31,0.12203733126322429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,63,0.12334400415420532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,63,0.12266666690508525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,127,0.02716800073782603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,255,0.0529120018084844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,255,0.03189333279927572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,1,0.09013866384824117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,127,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,511,0.06779733300209045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,float16,1023,0.08679466446240743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,511,0.0469706654548645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,1,128,1,float16,fp8,1023,0.07098133365313213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,3,0.09450133641560872
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,15,0.12245866656303406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,1,0.09185600280761719
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,3,0.09324799974759419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,31,0.12271466851234436
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,7,0.09851732850074768
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,7,0.0972213347752889
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,15,0.12246400117874146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,31,0.12242133418718974
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,63,0.12330133716265361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,127,0.01817600056529045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,63,0.12319466471672058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,127,0.04738666613896688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,255,0.05532266696294149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,255,0.025946666797002155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,511,0.07463466624418895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,511,0.04072533299525579
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,float16,1023,0.11833600203196208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,2,128,1,float16,fp8,1023,0.0860640009244283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,1,0.09212266405423482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,7,0.09875733653704326
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,3,0.09341333309809367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,3,0.09494933485984802
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,1,0.09034666419029236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,7,0.09735467036565144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,15,0.1225920021533966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,15,0.12268799543380737
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,31,0.12266666690508525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,31,0.1232266624768575
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,63,0.1306880017121633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,63,0.12343999743461609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,127,0.08302933474381764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,127,0.031290667752424874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,255,0.05261866748332977
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,255,0.09623466928799947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,511,0.13105066617329916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,1023,0.11221866806348164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,1,0.09061333537101746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,fp8,511,0.06669333577156067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,3,0.0937600036462148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,1,0.09264533718427022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,3,0.09564266602198283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,4,128,1,float16,float16,1023,0.21158933639526367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,7,0.09779733419418335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,31,0.13433067003885904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,7,0.0995306670665741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,15,0.12315733234087627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,15,0.12340266505877177
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,63,0.13310933113098145
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,127,0.1521813372770945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,31,0.1241919994354248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,63,0.13225066661834717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,127,0.06182399888833364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,255,0.17629333337148032
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,255,0.07549866537253062
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,1,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,511,0.11736533045768738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,1023,0.3935413360595703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,float16,511,0.24131200710932413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,1,0.009930666536092758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,16,8,128,1,float16,fp8,1023,0.20101332664489746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,3,0.009754666437705358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,3,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,7,0.009525333220760027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,7,0.009893333539366722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,31,0.010533332824707031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,15,0.00984533317387104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,15,0.01081066702802976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,63,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,63,0.012133333832025528
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,127,0.012346666306257248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,127,0.012282667060693106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,255,0.01251199965675672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,255,0.012149333953857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,511,0.02807466685771942
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,511,0.028021333118279774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,1023,0.013770667215188345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,1023,0.0103946669648091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,4095,0.018778666853904724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,2047,0.011770666887362799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,4095,0.013386666774749756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,2047,0.01738133281469345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,8191,0.027664000789324444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,16383,0.03384533276160558
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,16383,0.02250133454799652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,8191,0.01859733338157336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,32767,0.06483200192451477
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,32767,0.04332800209522247
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,65535,0.08322133123874664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,1,0.009535999968647957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,3,0.009637333452701569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,65535,0.05756799876689911
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,1,0.0099093330403169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,3,0.009919999788204828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,float16,131071,0.13822399576505026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,1,128,1,float16,fp8,131071,0.0990559955437978
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,7,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,7,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,15,0.009759999811649323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,15,0.010128000130256018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,63,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,63,0.012389333297808966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,31,0.01109333336353302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,127,0.012389333297808966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,127,0.012304000556468964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,255,0.012565333396196365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,255,0.01231466606259346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,511,0.013050666699806849
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,511,0.008234666660428047
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,1023,0.01341333364446958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,2047,0.02222399910291036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,2047,0.010944000134865442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,1023,0.00966933307548364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,4095,0.025472000241279602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,4095,0.013007999708255133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,8191,0.037231999138991036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,8191,0.017194667210181553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,16383,0.05709866682688395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,16383,0.02959466725587845
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,32767,0.08333866794904073
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,65535,0.07454399764537811
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,32767,0.043866669138272606
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,65535,0.17983466386795044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,float16,131071,0.2347360054651896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,1,0.009514666472872099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,1,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,3,0.009663999701539675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,2,128,1,float16,fp8,131071,0.1193386713663737
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,3,0.01009599988659223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,7,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,7,0.010431999961535135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,15,0.009786666681369146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,15,0.010079999764760336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,31,0.010741333166758219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,63,0.012469333906968435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,63,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,127,0.012293333808581034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,127,0.012319999436537424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,255,0.012757333616415659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,255,0.007674666742483775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,511,0.01340266689658165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,511,0.007983999947706858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,1023,0.009359999870260557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,1023,0.019130667050679524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,2047,0.028533334533373516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,4095,0.01865600049495697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,2047,0.011424000064531961
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,8191,0.05946666498978933
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,4095,0.041189332803090416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,8191,0.024735999604066212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,16383,0.04403733213742574
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,16383,0.09001066287358601
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,32767,0.17126933733622232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,32767,0.06674133241176605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,65535,0.22030399243036905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,65535,0.11230400204658508
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,1,0.01009599988659223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,1,0.00980266680320104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,float16,131071,0.40649600823720294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,3,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,4,128,1,float16,fp8,131071,0.20248534282048544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,3,0.009999999776482582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,7,0.009663999701539675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,15,0.009861333295702934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,7,0.010048000141978264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,15,0.010138666878143946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,31,0.010597333312034607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,63,0.01240533341964086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,31,0.010837333897749582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,63,0.012373333175977072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,127,0.010405333091815313
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,255,0.013440000514189402
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,127,0.006864000111818314
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,255,0.00772266648709774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,511,0.01930133377512296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,511,0.008762666955590248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,1023,0.02828799933195114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,1023,0.0141546664138635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,2047,0.043290664752324425
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,2047,0.017717332889636356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,4095,0.06845866640408833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,4095,0.027258666853109997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,8191,0.10140267014503479
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,8191,0.04152533411979675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,16383,0.1274186670780182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,16383,0.06520533561706543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,32767,0.1120853324731191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,32767,0.2121386726697286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,65535,0.38205866018931073
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,65535,0.20654932657877603
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,1,0.17465599377950033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,float16,131071,1.1454506715138753
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,16,8,128,1,float16,fp8,131071,0.3964000145594279
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,1,0.17819199959437051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,3,0.18039999405543009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,7,0.18982400496800741
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,3,0.18254399299621582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,7,0.1919040083885193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,15,0.2392639915148417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,31,0.23985600471496582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,15,0.23881600300470987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,31,0.23914666970570883
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,63,0.24263467391331991
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,63,0.2400533358256022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,127,0.02775466690460841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,127,0.04991999765237173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,255,0.06764266888300578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,1,0.17455466588338217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,255,0.042026668787002563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,fp8,511,0.05610666672388712
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,1,0.1781546672185262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,1,128,1,float16,float16,511,0.07954666515191396
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,3,0.18060266971588135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,3,0.18289599816004434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,7,0.1896373430887858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,7,0.19219199816385904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,15,0.23972266912460327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,15,0.23915199438730875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,31,0.2410773237546285
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,31,0.23914132515589395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,63,0.2512906591097514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,127,0.08945066730181377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,127,0.036943999429543815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,63,0.24302933613459268
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,255,0.09891200065612793
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,255,0.049423997600873314
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,fp8,511,0.07083733379840851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,2,128,1,float16,float16,511,0.13555733362833658
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,1,0.1747786601384481
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,1,0.17830399672190347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,3,0.18355733156204224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,3,0.18104533354441324
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,7,0.19029333194096884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,7,0.19300800561904907
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,15,0.24187199274698892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,15,0.24061334133148193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,31,0.2516266703605652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,31,0.24477332830429077
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,63,0.2537600000699361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,63,0.25329599777857464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,127,0.1546239952246348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,127,0.05808533231417338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,255,0.07794133325417836
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,255,0.17963733275731406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,1,0.17541333039601645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,1,0.17941333850224814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,float16,511,0.246943990389506
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,4,128,1,float16,fp8,511,0.162773331006368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,3,0.1852746605873108
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,3,0.18178667624791464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,7,0.1954773267110189
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,7,0.1949173410733541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,15,0.25005332628885907
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,15,0.24474666515986124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,31,0.25569067398707074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,63,0.25774399439493817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,31,0.25863999128341675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,127,0.29389333724975586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,127,0.10513599713643391
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,255,0.34091734886169434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,255,0.1420746644337972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,63,0.25724265972773236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,float16,511,0.46730132897694904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,16,8,128,1,float16,fp8,511,0.2216213345527649
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,1,0.35124798615773517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,3,0.3604960044225057
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,3,0.35470398267110187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,7,0.3746933142344157
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,1,0.3445386489232381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,15,0.473690668741862
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,7,0.37885868549346924
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,15,0.47380268573760986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,31,0.485861341158549
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,31,0.47421332200368244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,63,0.491215984026591
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,63,0.48558398087819415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,127,0.06805866460005443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,127,0.09603200356165568
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,1,0.3445546627044678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,fp8,255,0.08031466603279114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,3,0.3557600180308024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,3,0.36075735092163086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,1,0.35077333450317383
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,1,128,1,float16,float16,255,0.10523200035095215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,7,0.3782186508178711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,15,0.4820266564687093
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,15,0.47652268409729004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,31,0.49000000953674316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,31,0.48636265595753986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,7,0.3791786829630534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,63,0.49424533049265545
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,127,0.1612160007158915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,63,0.4946719805399577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,127,0.06816000243028005
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,1,0.34487466017405194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,1,0.35150400797526044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,float16,255,0.18717867136001587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,3,0.36215468247731525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,2,128,1,float16,fp8,255,0.0876533289750417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,3,0.3628000020980835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,7,0.3877120018005371
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,7,0.3943573236465454
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,15,0.49168535073598224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,15,0.4870986541112264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,31,0.4958133300145467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,63,0.49856531620025635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,63,0.499722679456075
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,31,0.4949653148651123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,127,0.29767467578252155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,1,0.35893865426381427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,1,0.35755733648935956
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,float16,255,0.37812801202138263
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,127,0.10889599720637004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,3,0.3826346794764201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,4,128,1,float16,fp8,255,0.14698132872581482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,7,0.4039146502812703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,3,0.3808906475702922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,7,0.41088000933329266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,15,0.49241065979003906
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,15,0.5045919815699259
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,31,0.5040426651636759
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,63,0.5078080097834269
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,31,0.5033599932988485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,63,0.508410652478536
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,1,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,1,0.009359999870260557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,3,0.009136000027259191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,127,0.20303465922673544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,3,0.009472000102202097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,7,0.009242666885256767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,255,0.670197327931722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,fp8,255,0.32738133271535236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,16,8,128,1,float16,float16,127,0.5726026693979899
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,7,0.00933333362142245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,15,0.010026666646202406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,31,0.011749333391586939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,15,0.010117333382368088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,31,0.011957333733638128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,63,0.011823999385039011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,63,0.011792000383138657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,127,0.011834666132926941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,255,0.013418667018413544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,511,0.009546666716535887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,255,0.013861333330472311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,127,0.011882666498422623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,511,0.012831999609867731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,1023,0.01358933374285698
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,2047,0.0225600004196167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,1023,0.010640000303586325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,2047,0.01322666679819425
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,4095,0.02565866708755493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,4095,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,8191,0.03878933439652125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,8191,0.026778665681680042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,16383,0.05707733333110809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,16383,0.035402665535608925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,32767,0.10319466392199199
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,32767,0.06178666651248932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,float16,65535,0.1381013294061025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,1,0.00915733352303505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,1,128,1,float16,fp8,65535,0.09701333443323772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,1,0.00996800015370051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,3,0.009002666920423508
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,3,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,7,0.009136000027259191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,7,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,15,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,15,0.01007466639081637
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,31,0.011765333513418833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,31,0.011850666254758835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,63,0.011610666910807291
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,63,0.01180800050497055
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,127,0.011706666400035223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,127,0.011941333611806234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,511,0.013381333400805792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,255,0.007930666829148928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,255,0.012757333616415659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,511,0.008538666491707167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,1023,0.01913600042462349
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,2047,0.02887466549873352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,1023,0.010112000008424124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,2047,0.012773333738247553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,4095,0.04165333261092504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,8191,0.05993066728115082
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,8191,0.026687999566396076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,4095,0.020080000162124634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,16383,0.09146133065223694
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,16383,0.056757330894470215
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,32767,0.1302079955736796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,32767,0.06858666737874348
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,1,0.00915733352303505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,3,0.009183999771873156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,1,0.009296000003814697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,3,0.009423999736706415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,float16,65535,0.22113066911697388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,2,128,1,float16,fp8,65535,0.11429333686828613
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,7,0.009423999736706415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,7,0.009488000224033991
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,15,0.01002133327225844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,15,0.010037333394090334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,31,0.011813333878914515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,63,0.011882666498422623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,31,0.0116799995303154
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,63,0.012063999970753988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,127,0.010175999874869982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,127,0.00666133314371109
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,255,0.013343999783198038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,255,0.007621333623925845
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,511,0.019093333433071773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,511,0.008885333314538002
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,1023,0.02792533238728841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,1023,0.014309333016475042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,2047,0.01781333362062772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,2047,0.04355733096599579
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,4095,0.06727999945481618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,4095,0.02759466568628947
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,8191,0.04164266586303711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,8191,0.10059199730555217
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,16383,0.06558399895826976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,16383,0.1264693339665731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,32767,0.2121493419011434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,1,0.009093333035707474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,1,0.009194666519761086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,32767,0.11187733213106792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,float16,65535,0.3823946714401245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,4,128,1,float16,fp8,65535,0.20643200476964316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,3,0.009519999846816063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,3,0.009285333255926767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,7,0.009306666751702627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,7,0.00956266683836778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,15,0.009904000287254652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,15,0.010288000106811523
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,31,0.011690666278203329
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,31,0.01192533348997434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,63,0.011994666109482447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,63,0.011690666278203329
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,127,0.006730666384100914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,127,0.010512000570694605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,255,0.020597333709398907
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,255,0.008394666636983553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,511,0.027999999622503918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,511,0.013386666774749756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,1023,0.04585599899291992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,1023,0.020224000016848247
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,2047,0.06585066517194112
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,2047,0.025946666797002155
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,4095,0.0995199978351593
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,4095,0.039962666730086006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,8191,0.1251573363939921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,8191,0.06418133278687795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,16383,0.21121066808700562
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,16383,0.1106666624546051
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,32767,0.20357867081960043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,32767,0.37798933188120526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,1,0.009066666786869368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,1,0.009237333511312803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,float16,65535,1.1357813676198323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,3,0.009290666629870733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,16,8,128,1,float16,fp8,65535,0.39084800084431964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,3,0.009477333476146063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,7,0.009306666751702627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,7,0.00955200009047985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,15,0.010005333150426546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,15,0.010037333394090334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,31,0.011711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,31,0.011893333246310553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,63,0.011946666985750198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,127,0.011765333513418833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,127,0.011909333368142446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,63,0.011946666985750198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,255,0.012709333250919977
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,255,0.00789866658548514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,511,0.013242666920026144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,511,0.008496000121037165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,1023,0.019248000035683315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,1023,0.010202666744589806
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,2047,0.029253333806991577
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,2047,0.012784000486135483
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,4095,0.04257600009441376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,4095,0.01988799994190534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,8191,0.06623466809590657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,8191,0.027082666754722595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,16383,0.0925546685854594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,16383,0.046906664967536926
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,32767,0.1317813297112783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,32767,0.06884266436100006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,1,0.008938666433095932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,1,0.009269333134094873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,float16,65535,0.21925866603851318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,1,128,1,float16,fp8,65535,0.114138662815094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,3,0.00903466654320558
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,3,0.009408000235756239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,7,0.009338666374484697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,15,0.009898666913310686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,7,0.009242666885256767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,15,0.010138666878143946
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,31,0.011749333391586939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,31,0.011893333246310553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,63,0.011882666498422623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,63,0.011658667276302973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,127,0.010533332824707031
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,127,0.006437333300709724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,255,0.013429333766301474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,255,0.007690666864315669
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,511,0.00871999996403853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,511,0.0191040001809597
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,1023,0.02807466685771942
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,1023,0.014111999422311783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,2047,0.04381333291530609
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,2047,0.017664000391960144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,4095,0.027242665489514668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,4095,0.06739200154940288
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,8191,0.08756267031033833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,8191,0.0415786678592364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,16383,0.12718400359153748
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,16383,0.06519466638565063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,32767,0.21312000354131064
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,32767,0.1567466656366984
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,1,0.009088000282645226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,1,0.009434666484594345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,3,0.009285333255926767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,float16,65535,0.3818346659342448
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,2,128,1,float16,fp8,65535,0.20666666825612387
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,3,0.009397333487868309
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,7,0.009370666618148485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,7,0.009365333244204521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,15,0.010112000008424124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,15,0.010159999753038088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,31,0.011711999773979187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,31,0.012117333710193634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,63,0.011727999895811081
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,127,0.01051733394463857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,63,0.012015999605258306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,255,0.020474666108687718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,127,0.006645333642760913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,255,0.008346666892369589
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,511,0.028138667345046997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,511,0.013536000003417334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,1023,0.02021866664290428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,1023,0.04588800172011057
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,2047,0.026047999660174053
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,2047,0.06664533416430156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,4095,0.10012800494829814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,4095,0.04038399954636892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,8191,0.12706666191418967
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,8191,0.06404266754786174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,16383,0.21326400836308798
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,16383,0.11066133777300517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,32767,0.20382932821909586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,32767,0.3792159954706828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,1,0.009119999905427298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,fp8,65535,0.3917386531829834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,8,4,128,1,float16,float16,65535,1.1378933588663738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,1,0.009648000200589498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,3,0.009477333476146063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,3,0.009354666496316591
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,7,0.009290666629870733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,7,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,15,0.009301333377758661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,15,0.009578666960199675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,31,0.0103946669648091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,31,0.010250666489203772
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,63,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,63,0.011823999385039011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,127,0.011792000383138657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,255,0.011882666498422623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,127,0.011893333246310553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,511,0.0164533331990242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,255,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,1023,0.017488000293572743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,511,0.01613866661985715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,2047,0.017573333034912746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,2047,0.017445333302021027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,1023,0.01741333305835724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,4095,0.017642666896184284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,4095,0.017562666287024815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,8191,0.018186666071414948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,8191,0.012879999975363413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,16383,0.0198186660806338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,16383,0.015290666371583939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,32767,0.02359466751416524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,32767,0.019146667172511418
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,65535,0.029285334050655365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,65535,0.024218666056791942
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,1,0.009141333401203156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,float16,131071,0.040106666584809623
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,1,0.00933333362142245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,3,0.00921066664159298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,3,0.00938666673998038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,1,128,1,float16,fp8,131071,0.03165333221356074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,7,0.009237333511312803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,7,0.00955200009047985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,15,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,15,0.009701333319147428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,31,0.010378666842977205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,31,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,63,0.011909333368142446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,63,0.01184533288081487
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,127,0.011999999483426413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,127,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,255,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,255,0.012005332857370377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,511,0.01616000011563301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,511,0.01628799984852473
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,1023,0.01752000053723653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,1023,0.017488000293572743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,2047,0.017711999515692394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,2047,0.01748266691962878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,4095,0.01543466622630755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,4095,0.010490667074918747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,8191,0.01841066653529803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,8191,0.012437333663304647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,16383,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,16383,0.020362666497627895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,32767,0.02492800106604894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,32767,0.018986667195955913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,65535,0.031013332307338715
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,65535,0.023936000963052113
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,1,0.009402666861812273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,float16,131071,0.059546664357185364
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,1,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,2,128,1,float16,fp8,131071,0.03570666660865148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,3,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,3,0.009525333220760027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,7,0.009216000015536943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,7,0.009509333098928133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,15,0.009599999835093817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,15,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,31,0.010133333504199982
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,31,0.010234666367371878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,63,0.011962667107582092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,63,0.011989332735538483
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,127,0.011920000116030375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,127,0.011999999483426413
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,255,0.012250666817029318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,255,0.011754666765530905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,511,0.016447999825080235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,511,0.016362667083740234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,1023,0.017674667139848072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,1023,0.01724799970785777
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,2047,0.00895999992887179
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,2047,0.01469333345691363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,4095,0.015941333025693893
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,4095,0.010458666831254959
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,8191,0.01932799940307935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,8191,0.012554666648308435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,16383,0.02165866643190384
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,16383,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,32767,0.03597866743803024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,32767,0.0220320001244545
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,65535,0.050853331883748375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,65535,0.028714666763941448
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,float16,131071,0.08291199803352356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,1,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,3,0.009279999881982803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,3,0.009397333487868309
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,8,4,128,1,float16,fp8,131071,0.04251199960708618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,1,0.009610666582981745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,7,0.009519999846816063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,7,0.00943999985853831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,15,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,15,0.009818666925032934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,31,0.010314666976531347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,31,0.010346666599313417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,63,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,63,0.01179733375708262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,127,0.012026666353146235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,127,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,255,0.012069333344697952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,511,0.01647466669480006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,511,0.016544000556071598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,255,0.012144000579913458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,1023,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,1023,0.017562666287024815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,2047,0.017504000415404636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,2047,0.017674667139848072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,4095,0.01634666696190834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,4095,0.010773333410422007
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,8191,0.01303999995191892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,8191,0.019610666980346043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,16383,0.021162666380405426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,16383,0.015290666371583939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,32767,0.025839999318122864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,32767,0.01930133377512296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,65535,0.031983998914559685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,65535,0.02388266722361247
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,1,0.00926399976015091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,1,0.009482666850090027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,3,0.00927466650803884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,fp8,131071,0.03566933423280716
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,1,128,1,float16,float16,131071,0.06214400132497152
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,3,0.009573333586255709
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,7,0.009413333609700203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,7,0.009450666606426239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,15,0.009317333499590555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,15,0.009632000078757605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,31,0.01028266673286756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,31,0.0103946669648091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,63,0.012159999459981918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,63,0.012042666474978128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,127,0.011930666863918304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,255,0.012069333344697952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,255,0.011930666863918304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,127,0.011898666620254517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,511,0.016538667182127636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,511,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,1023,0.017621333400408428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,1023,0.01748266691962878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,2047,0.009066666786869368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,2047,0.015205333630243937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,4095,0.01032533310353756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,4095,0.016693333784739178
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,8191,0.020026666422684986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,8191,0.01232533281048139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,16383,0.021957332889238994
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,16383,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,32767,0.02143466720978419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,32767,0.035642666121323906
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,65535,0.050250664353370667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,65535,0.028149334092934925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,1,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,1,0.009653333574533463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,float16,131071,0.08257600168387096
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,2,128,1,float16,fp8,131071,0.042725334564844765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,3,0.009338666374484697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,3,0.009578666960199675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,7,0.009722666814923286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,7,0.009328000247478485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,15,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,15,0.010037333394090334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,31,0.010202666744589806
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,31,0.010496000448862711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,63,0.012047999848922094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,63,0.011834666132926941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,127,0.011952000359694162
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,127,0.0120319997270902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,255,0.012223999947309494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,255,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,511,0.016623999923467636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,511,0.01642666632930438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,1023,0.013663999736309052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,1023,0.008250666782259941
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,2047,0.01600533351302147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,2047,0.00926399976015091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,4095,0.017231999586025875
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,4095,0.01062400018175443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,8191,0.026677332818508148
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,8191,0.014117332796255747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,16383,0.031983998914559685
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,16383,0.01724799970785777
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,32767,0.052149335543314614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,32767,0.02481599897146225
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,65535,0.07976000010967255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,65535,0.0510453333457311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,1,0.014896000425020853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,1,0.01523200049996376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,float16,131071,0.1355946660041809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,8,4,128,1,float16,fp8,131071,0.06875200072924297
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,3,0.015290666371583939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,3,0.015413332730531693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,7,0.015306666493415833
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,7,0.015541333705186844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,15,0.0163680004576842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,15,0.016645333419243496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,31,0.019978666057189304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,31,0.01982933282852173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,63,0.020138667275508244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,63,0.020069333414236706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,127,0.010656000425418219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,127,0.007173333317041397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,255,0.013541333377361298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,255,0.00877333308259646
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,511,0.019402666638294857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,511,0.009514666472872099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,1023,0.027727998793125153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,1023,0.015301333119471868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,2047,0.04494399825731913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,2047,0.019205333044131596
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,4095,0.07330666482448578
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,4095,0.030720000465710957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,8191,0.08899733424186707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,8191,0.043791999419530235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,16383,0.13029332955678305
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,16383,0.06758399804433186
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,1,0.014874666929244995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,1,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,3,0.015119999647140503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,3,0.015413332730531693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,float16,32767,0.21482133865356445
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,7,0.015247999380032221
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,1,128,1,float16,fp8,32767,0.11488533020019531
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,7,0.015546667079130808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,15,0.01640533283352852
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,15,0.016805333395799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,31,0.019765333582957584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,31,0.019882666567961376
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,63,0.020026666422684986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,127,0.010874666273593903
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,127,0.008181333541870117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,255,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,255,0.02073066681623459
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,511,0.028351999819278717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,63,0.020202666521072388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,511,0.013605333864688873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,1023,0.045519997676213585
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,2047,0.06601066887378693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,4095,0.10020800431569417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,1023,0.020479999482631683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,4095,0.04083200047413508
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,2047,0.0268053337931633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,8191,0.06467733283837636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,8191,0.12710932890574136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,16383,0.21203200022379556
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,1,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,16383,0.11130666732788086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,1,0.015392000476519266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,3,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,float16,32767,0.5830026865005493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,3,0.015728000551462173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,2,128,1,float16,fp8,32767,0.20524267355600992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,7,0.015253332753976187
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,7,0.015717333803574245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,15,0.01661866654952367
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,31,0.019904000063737232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,15,0.01685333376129468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,31,0.020143999407688778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,127,0.015461333096027374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,63,0.020128000527620316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,127,0.00761600024998188
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,255,0.030048000315825146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,63,0.020213333268960316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,255,0.013338666409254074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,511,0.019845332950353622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,511,0.04659200211366018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,1023,0.0620959997177124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,2047,0.04002666721741358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,2047,0.08555733164151509
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,1023,0.02573866645495097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,4095,0.06368533273537953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,4095,0.1280639966328939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,8191,0.3045919934908549
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,16383,0.38093332449595135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,8191,0.11085333426793416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,16383,0.2041119933128357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,1,0.009301333377758661
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,1,0.010837333897749582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,float16,32767,0.7177706559499105
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,3,0.00949866697192192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,3,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,15,0.00949866697192192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,7,0.009754666437705358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,8,4,128,1,float16,fp8,32767,0.3911893367767334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,7,0.010474666953086853
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,31,0.010480000327030817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,31,0.010341333225369453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,15,0.01097600037852923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,63,0.012159999459981918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,63,0.012074666718641916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,127,0.012165332833925882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,127,0.011871999750534693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,255,0.0124746672809124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,511,0.01648533344268799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,255,0.012047999848922094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,1023,0.017770666629076004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,2047,0.0169813334941864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,1023,0.017711999515692394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,511,0.016688000410795212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,4095,0.010901333143313726
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,2047,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,8191,0.021168000996112823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,4095,0.018090666582187016
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,8191,0.012944000462690989
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,16383,0.015344000111023584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,16383,0.023823998868465424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,32767,0.03771200031042099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,32767,0.022282667458057404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,65535,0.05299733579158783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,65535,0.02942933390537898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,1,0.009679999823371569
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,1,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,fp8,131071,0.04380266865094503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,1,128,1,float16,float16,131071,0.0862613320350647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,3,0.010549332946538925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,3,0.009797333429257074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,7,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,7,0.009514666472872099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,15,0.00961599995692571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,15,0.00997866690158844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,31,0.010597333312034607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,63,0.011936000237862269
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,31,0.011658667276302973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,127,0.012149333953857422
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,255,0.012378666549921036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,63,0.012229333321253458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,255,0.0122079998254776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,127,0.012069333344697952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,511,0.016528000434239704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,511,0.01647466669480006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,1023,0.008789333204428354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,2047,0.017173333714405697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,1023,0.01341333364446958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,4095,0.018309333672126133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,4095,0.010618666807810465
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,2047,0.009429333110650381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,8191,0.0273333340883255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,8191,0.014432000617186228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,16383,0.03328000009059906
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,16383,0.01756799966096878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,32767,0.05450133482615153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,32767,0.02475733309984207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,65535,0.0805866668621699
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,65535,0.05113600194454193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,float16,131071,0.13587199648221335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,1,0.009349333122372627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,1,0.009861333295702934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,3,0.009578666960199675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,3,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,7,0.009503999724984169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,2,128,1,float16,fp8,131071,0.09486933549245198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,7,0.009824000298976898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,15,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,15,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,31,0.010399999717871347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,31,0.01081066702802976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,63,0.012202666451533636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,63,0.012015999605258306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,127,0.012181332955757776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,127,0.01228800043463707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,255,0.012256000190973282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,255,0.012080000092585882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,511,0.012725333372751871
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,511,0.007733333234985669
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,1023,0.013471999516089758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,1023,0.008309333274761835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,2047,0.022261333962281544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,4095,0.02569599946339925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,2047,0.010415999839703241
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,4095,0.012069333344697952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,8191,0.036501333117485046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,8191,0.01600533351302147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,16383,0.028384000062942505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,16383,0.05593066910902659
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,32767,0.10013332962989807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,32767,0.04190400242805481
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,65535,0.13589866956075033
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,65535,0.07167999943097432
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,1,0.014917333920796713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,3,0.01526933287580808
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,1,0.01469333345691363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,fp8,131071,0.11678399642308553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,8,4,128,1,float16,float16,131071,0.23635200659434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,3,0.015429332852363586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,7,0.015824000040690105
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,7,0.01586666703224182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,15,0.019482667247454327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,15,0.01931200052301089
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,31,0.01937599976857503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,31,0.01950399950146675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,63,0.019509332875410717
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,63,0.019632000476121902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,127,0.01090666651725769
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,127,0.007311999797821045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,255,0.021087999145189922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,255,0.009317333499590555
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,511,0.02805333336194356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,511,0.015279999623696009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,1023,0.02213866760333379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,1023,0.04597333570321401
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,2047,0.029792000850041706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,2047,0.06726400057474773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,4095,0.08760533730189006
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,4095,0.05362133185068766
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,8191,0.12903466820716858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,8191,0.06709866722424825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,1,0.014922666052977243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,1,0.015082667271296183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,float16,16383,0.21514666080474854
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,3,0.015205333630243937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,3,0.01552533358335495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,7,0.01578666642308235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,7,0.016095999628305435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,15,0.019434666881958645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,1,128,1,float16,fp8,16383,0.11474666992823283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,15,0.019381333142518997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,63,0.01966399947802226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,63,0.01956266661485036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,31,0.019306667149066925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,31,0.019472000499566395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,127,0.01568000018596649
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,127,0.007466666400432587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,255,0.030181333422660828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,255,0.013503999759753546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,511,0.04649066428343455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,511,0.019909333437681198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,1023,0.02657066782315572
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,2047,0.08578667044639587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,2047,0.04060266663630804
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,1023,0.06615466872851054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,4095,0.1269813378651937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,8191,0.2137706677118937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,1,0.014789332946141561
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,4095,0.06426133215427399
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,8191,0.11200533310572307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,1,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,fp8,16383,0.20570667584737143
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,2,128,1,float16,float16,16383,0.38526399930318195
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,3,0.015418666104475657
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,3,0.015589332828919092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,7,0.015728000551462173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,15,0.019610666980346043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,7,0.016143999993801117
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,15,0.01952533299724261
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,31,0.019600000232458115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,63,0.019653332730134327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,63,0.019610666980346043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,127,0.025216000775496166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,31,0.019487999379634857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,127,0.010911999891201654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,255,0.04939733445644379
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,255,0.019733333339293797
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,511,0.025648000339667004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,511,0.06489066779613495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,1023,0.08313600222269694
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,1023,0.040618665516376495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,2047,0.06405866642793019
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,2047,0.13009599844614664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,4095,0.21658132473627725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,8191,0.38758933544158936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,4095,0.15478400389353433
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,8191,0.20444266001383463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,1,0.025829332570234936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,1,0.026330667237440746
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,3,0.026736001173655193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,float16,16383,0.725653330485026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,8,4,128,1,float16,fp8,16383,0.39244266351064044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,3,0.027141332626342773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,7,0.027701333165168762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,7,0.02804800122976303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,15,0.034789333740870156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,15,0.034858666360378265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,31,0.0347680002450943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,31,0.035018667578697205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,63,0.03542399903138479
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,63,0.035045333206653595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,127,0.015802666544914246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,127,0.009994666402538618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,255,0.03036266565322876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,511,0.04710933566093445
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,1023,0.06211199859778086
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,255,0.015216000378131866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,511,0.022005334496498108
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,1023,0.0284853329261144
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,2047,0.10217600067456563
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,1,0.02590399980545044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,1,0.026314665873845417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,4095,0.0669653316338857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,fp8,2047,0.04312533140182495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,3,0.02717333287000656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,1,128,1,float16,float16,4095,0.1288266678651174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,3,0.026746665438016255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,7,0.027690666417280834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,7,0.028016000986099243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,15,0.03491200009981791
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,15,0.03474666674931844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,31,0.03528533379236857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,127,0.011173332730929056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,127,0.025472000241279602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,31,0.034671999514102936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,63,0.03513066718975703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,63,0.03537066777547201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,255,0.04969066878159841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,255,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,511,0.06512533128261566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,511,0.02683199942111969
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,1023,0.0981066624323527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,2047,0.1302293340365092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,1023,0.041050667564074196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,2047,0.06503466765085857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,float16,4095,0.21799999475479126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,2,128,1,float16,fp8,4095,0.11191466450691223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,1,0.026362667481104534
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,1,0.02606933315594991
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,3,0.02701333413521449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,15,0.03497066597143809
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,31,0.03526400029659271
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,3,0.027632000545660656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,7,0.028010666370391846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,15,0.03499199946721395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,7,0.028250666956106823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,31,0.035114665826161705
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,63,0.035216001172860466
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,127,0.04470933477083842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,63,0.03535466641187668
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,127,0.016010666886965435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,255,0.054005334774653115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,255,0.021759999295075733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,511,0.07193066676457723
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,511,0.03710933278004328
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,1023,0.1586133340994517
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,1023,0.06084799766540527
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,2047,0.20655999581019083
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,2047,0.10723732908566792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,1,0.04744533201058706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,3,0.04981866478919983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,float16,4095,0.37795201937357586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,1,0.04844800134499868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,3,0.04925866425037384
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,7,0.05144000053405762
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,8,4,128,1,float16,fp8,4095,0.20238399505615234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,7,0.05158400038878123
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,15,0.06407466530799866
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,15,0.06393066545327504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,63,0.0645546664794286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,31,0.06457066535949707
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,31,0.06427733103434245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,63,0.06471466521422069
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,127,0.02619733413060506
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,511,0.0653599997361501
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,127,0.01250133290886879
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,255,0.05128000179926554
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,255,0.022117334107557934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,511,0.03012799968322118
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,1023,0.08454400300979614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,1023,0.05485333502292633
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,1,0.04766400158405304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,fp8,2047,0.06760000189145406
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,1,128,1,float16,float16,2047,0.13114666938781738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,1,0.0483893354733785
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,3,0.049498667319615684
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,3,0.050250664353370667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,7,0.05136533578236898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,7,0.05163733164469401
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,15,0.06419733166694641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,15,0.06442666550477345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,31,0.06457599997520447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,63,0.06484266618887584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,127,0.04550399879614512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,63,0.06477333108584087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,31,0.06426666676998138
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,127,0.01616000011563301
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,255,0.05425600210825602
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,255,0.022885332504908245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,1023,0.11634666721026103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,511,0.07272533575693767
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,511,0.03736000011364619
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,1023,0.0614026685555776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,1,0.04780800143877665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,float16,2047,0.20840533574422201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,3,0.04965866605440775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,2,128,1,float16,fp8,2047,0.15308266878128052
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,1,0.04888000090916952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,3,0.05062933266162872
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,7,0.05142933130264282
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,15,0.06465066472689311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,7,0.05221866567929586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,15,0.06451733410358429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,31,0.06459199885527293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,31,0.06493333478768666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,127,0.02937600016593933
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,63,0.07117866476376851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,63,0.06506666541099548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,127,0.08203200002511342
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,255,0.09523733456929524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,255,0.04286933441956838
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,511,0.13063466548919678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,1023,0.20986666282018027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,511,0.06562666594982147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,1,0.009925333162148794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,1,0.010709332923094431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,1023,0.1572480003039042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,3,0.009525333220760027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,3,0.009919999788204828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,float16,2047,0.38573865095774335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,8,4,128,1,float16,fp8,2047,0.20240533351898193
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,7,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,31,0.010405333091815313
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,7,0.010469333579142889
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,15,0.010869332899649939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,31,0.010640000303586325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,15,0.010640000303586325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,63,0.012175999581813812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,127,0.012266666938861212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,63,0.012138667205969492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,127,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,511,0.01684800038735072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,255,0.012442667037248611
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,255,0.012437333663304647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,511,0.01664000004529953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,1023,0.008549333239595095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,1023,0.01360000049074491
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,2047,0.009717333440979322
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,4095,0.01860800012946129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,2047,0.01720000058412552
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,4095,0.011109333485364914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,8191,0.02769600103298823
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,16383,0.033301333586374916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,16383,0.018378666291634243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,8191,0.014885333677132925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,32767,0.05513066550095876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,32767,0.026165333886941273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,65535,0.0820853312810262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,1,0.009488000224033991
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,1,0.009904000287254652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,3,0.009482666850090027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,65535,0.053301334381103516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,fp8,131071,0.070933332045873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,3,0.010005333150426546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,1,128,1,float16,float16,131071,0.13686399658521017
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,7,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,7,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,15,0.010058666889866194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,15,0.00973866693675518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,31,0.010506667196750641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,31,0.01062400018175443
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,63,0.012426666915416718
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,63,0.012485332787036896
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,127,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,255,0.01246400053302447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,127,0.012133333832025528
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,255,0.012293333808581034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,511,0.007567999884486198
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,511,0.012554666648308435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,1023,0.013370666652917862
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,1023,0.008362666393319765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,2047,0.02205866575241089
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,2047,0.010357333347201347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,4095,0.025397333006064098
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,4095,0.012042666474978128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,8191,0.03721600025892258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,8191,0.01643199970324834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,16383,0.056015998125076294
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,16383,0.02805333336194356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,32767,0.08282666901747386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,32767,0.0413973331451416
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,65535,0.13501866658528647
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,65535,0.09269866347312927
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,1,0.009589333087205887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,1,0.009872000043590864
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,float16,131071,0.23407999674479166
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,3,0.009642666826645533
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,2,128,1,float16,fp8,131071,0.11642666657765706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,3,0.009898666913310686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,7,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,7,0.009749333063761393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,15,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,15,0.010266666611035665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,31,0.010469333579142889
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,63,0.0122079998254776
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,31,0.010501333822806677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,63,0.012341332932313284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,127,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,127,0.012138667205969492
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,255,0.013007999708255133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,255,0.007941333577036858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,511,0.013034666577974955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,511,0.00789866658548514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,1023,0.019248000035683315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,1023,0.009359999870260557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,2047,0.02864533414443334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,2047,0.011685332904259363
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,4095,0.04114133367935816
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,4095,0.01870399961868922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,8191,0.05929600199063619
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,8191,0.025013332565625507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,16383,0.09098133444786072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,16383,0.04387199878692627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,32767,0.1302186648050944
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,32767,0.08885332942008972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,65535,0.22100265820821127
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,65535,0.11189333597819011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,1,0.09009599685668945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,1,0.09177600344022115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,fp8,131071,0.20320000251134238
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,8,4,128,1,float16,float16,131071,0.40598400433858234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,3,0.0932373305161794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,3,0.09451733032862346
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,7,0.09724266330401103
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,7,0.09854933619499207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,15,0.12221866846084595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,15,0.1225920021533966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,31,0.12296000123023987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,31,0.12229866782824199
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,63,0.12329600254694621
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,63,0.1229759951432546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,127,0.047184000412623085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,127,0.018266666680574417
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,255,0.055125330885251365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,255,0.026704000929991405
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,511,0.07595733304818471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,511,0.04041599979003271
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,1,0.09046399593353271
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,float16,1023,0.11994666854540507
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,1,128,1,float16,fp8,1023,0.08614933490753174
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,3,0.09355200330416362
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,3,0.09494933485984802
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,1,0.09213866790135701
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,7,0.09733333190282185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,7,0.09857066472371419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,15,0.12264532844225566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,15,0.12270933389663696
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,63,0.1311253309249878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,31,0.12301866213480632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,31,0.12276267011960347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,127,0.08299200236797333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,63,0.1234826644261678
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,255,0.043712000052134194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,127,0.03166933357715607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,511,0.13199466466903687
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,255,0.09661333759625752
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,511,0.08844799796740214
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,1,0.09076266487439473
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,float16,1023,0.2113599975903829
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,2,128,1,float16,fp8,1023,0.11275200049082439
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,1,0.09259200096130371
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,3,0.09582933783531189
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,7,0.09934932986895244
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,3,0.09377066294352214
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,7,0.097871998945872
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,15,0.12327466408411662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,31,0.13141866525014242
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,15,0.12372266252835591
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,63,0.13317333658536276
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,31,0.12396799524625142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,63,0.13502933581670126
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,127,0.15252799789110819
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,127,0.05624533196290334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,255,0.07614933451016744
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,511,0.11771200100580852
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,255,0.17762666940689087
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,511,0.2440053423245748
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,float16,1023,0.3940426508585612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,8,4,128,1,float16,fp8,1023,0.2990079919497172
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,3,0.1805973251660665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,1,0.17449599504470825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,1,0.1781760056813558
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,3,0.18279467026392618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,15,0.2391200065612793
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,7,0.19004799922307333
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,15,0.23972266912460327
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,31,0.2423093318939209
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,7,0.19225066900253296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,31,0.23914132515589395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,63,0.25191466013590497
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,63,0.24388267596562704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,127,0.08638399839401245
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,1,0.17480534315109253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,1,0.17849600315093994
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,255,0.1113973359266917
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,255,0.04915733138720194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,127,0.036981334288915
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,3,0.18092799186706543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,fp8,511,0.07115733126799266
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,3,0.18371200561523438
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,7,0.19021866718928018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,7,0.19287467002868652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,15,0.24204800526301065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,31,0.2518879969914754
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,15,0.24048000574111938
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,1,128,1,float16,float16,511,0.13710932930310568
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,31,0.2439253330230713
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,63,0.25407999753952026
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,63,0.25391467412312824
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,127,0.1606986622015635
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,127,0.05845866600672404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,255,0.18100800116856894
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,1,0.17535465955734253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,1,0.1795626680056254
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,3,0.18225600322087607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,3,0.18529067436854044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,float16,511,0.24757333596547446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,255,0.07825600107510884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,7,0.19494932889938354
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,7,0.1945599913597107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,15,0.24978133042653403
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,15,0.24667733907699585
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,31,0.25520533323287964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,2,128,1,float16,fp8,511,0.11967999736467998
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,31,0.2542933424313863
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,63,0.2576693296432495
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,63,0.25709333022435504
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,127,0.29467199246088666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,127,0.10579199592272441
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,1,0.009797333429257074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,1,0.009786666681369146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,255,0.3685493469238281
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,3,0.009866666669646898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,3,0.010037333394090334
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,255,0.1432799994945526
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,7,0.00979200005531311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,7,0.009765333185593287
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,15,0.009775999933481216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,15,0.0100853331387043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,31,0.010570666442314783
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,float16,511,0.47199467817942303
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,63,0.012479999413092932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,63,0.012063999970753988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,8,4,128,1,float16,fp8,511,0.2220426599184672
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,127,0.012304000556468964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,127,0.012154666086037954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,255,0.012624000509579977
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,255,0.012240000069141388
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,511,0.013167999684810638
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,1023,0.013376000026861826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,511,0.008282666405042013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,1023,0.008853333070874214
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,2047,0.011194666226704916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,2047,0.0220266655087471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,4095,0.025583999852339428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,4095,0.012719999998807907
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,8191,0.03798400113979975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,8191,0.0176959993938605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,16383,0.05718400080998739
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,16383,0.02980799973011017
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,32767,0.054383998115857445
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,32767,0.0843946635723114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,65535,0.1362399955590566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,65535,0.07336000104745229
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,1,0.009695999945203463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,float16,131071,0.23344000180562338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,3,0.009589333087205887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,3,0.010058666889866194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,1,0.009829333052039146
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,1,128,1,float16,fp8,131071,0.11849600076675415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,7,0.009690666571259499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,7,0.009813333551088968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,15,0.009941333283980688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,15,0.010064000263810158
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,31,0.010597333312034607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,31,0.01073066641887029
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,127,0.012442667037248611
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,63,0.012432000289360682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,63,0.012453333785136541
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,255,0.01303999995191892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,127,0.012383999923865
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,255,0.007365333537260692
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,511,0.01333333303531011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,511,0.008442666381597519
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,1023,0.01922133316596349
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,1023,0.009232000137368837
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,2047,0.0288426677385966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,2047,0.011653333902359009
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,4095,0.041178666055202484
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,4095,0.018624000251293182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,8191,0.059818665186564125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,8191,0.024826665719350178
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,16383,0.08922666311264038
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,16383,0.043791999419530235
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,32767,0.1295840044816335
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,32767,0.0666240006685257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,65535,0.2182719906171163
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,1,0.009759999811649323
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,65535,0.15734933813412985
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,1,0.009839999799927076
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,3,0.009685333197315535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,3,0.009866666669646898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,float16,131071,0.40455468495686847
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,2,128,1,float16,fp8,131071,0.20293333133061728
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,7,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,7,0.010005333150426546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,15,0.010117333382368088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,15,0.00985599992175897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,31,0.010496000448862711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,31,0.010821333775917688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,63,0.012469333906968435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,63,0.012335999558369318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,127,0.010389333590865135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,127,0.006575999781489372
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,255,0.013253333667914072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,255,0.00821333316465219
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,511,0.019402666638294857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,511,0.008762666955590248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,1023,0.027999999622503918
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,1023,0.014127999544143677
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,2047,0.04377066592375437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,2047,0.017621333400408428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,4095,0.07321066657702129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,8191,0.08763200044631958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,8191,0.041365332901477814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,4095,0.02701333413521449
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,16383,0.06484266618887584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,16383,0.12707733114560446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,32767,0.2113973299662272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,32767,0.11196266611417134
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,65535,0.584277351697286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,65535,0.20686399936676025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,1,0.010069333637754122
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,1,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,3,0.009648000200589498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,float16,131071,0.7232693036397299
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,3,0.009962666779756546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,8,4,128,1,float16,fp8,131071,0.3954293330510457
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,7,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,7,0.009808000177145004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,15,0.009941333283980688
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,15,0.009989333028594652
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,31,0.010522666076819101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,31,0.010869332899649939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,63,0.012586666891972223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,63,0.012341332932313284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,127,0.012293333808581034
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,255,0.008016000191370646
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,255,0.012890666723251343
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,127,0.01246400053302447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,511,0.01293333371480306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,511,0.007941333577036858
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,1023,0.01932799940307935
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,1023,0.009413333609700203
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,2047,0.02882133424282074
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,2047,0.011861333002646765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,4095,0.04201599955558777
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,4095,0.018415999909241993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,8191,0.0602400004863739
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,8191,0.02510400116443634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,16383,0.09198400378227234
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,16383,0.04400533437728882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,32767,0.1694506605466207
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,32767,0.06690133114655812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,65535,0.11210133632024129
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,65535,0.21880000829696655
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,1,0.009599999835093817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,1,0.009882666791478792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,3,0.009568000212311745
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,3,0.009925333162148794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,float16,131071,0.4061013460159302
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,7,0.009626666704813639
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,1,128,1,float16,fp8,131071,0.20282133420308432
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,15,0.00985599992175897
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,7,0.009984000275532404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,15,0.010245333115259806
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,31,0.010591999938090643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,31,0.010709332923094431
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,63,0.012448000411192576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,127,0.010512000570694605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,63,0.012362666428089142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,255,0.013568000247081121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,255,0.007637333124876022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,127,0.0064853330453236895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,511,0.019093333433071773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,511,0.008672000219424566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,1023,0.028016000986099243
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,1023,0.013962666193644205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,2047,0.017621333400408428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,2047,0.044293334086736046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,4095,0.06887466708819072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,4095,0.027285332481066387
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,8191,0.10174399614334106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,8191,0.04109866668780645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,16383,0.12935466567675272
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,16383,0.06451733410358429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,32767,0.2118826707204183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,32767,0.11197866996129353
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,65535,0.3801120122273763
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,65535,0.20570667584737143
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,1,0.009397333487868309
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,1,0.009381333366036415
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,3,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,3,0.009557333464423815
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,float16,131071,1.141493320465088
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,32,1,4,2,128,1,float16,fp8,131071,0.39401598771413165
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,7,0.009183999771873156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,7,0.009408000235756239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,15,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,15,0.009509333098928133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,31,0.010255999863147736
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,31,0.01009599988659223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,63,0.011770666887362799
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,63,0.011930666863918304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,127,0.0120319997270902
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,127,0.011733333269755045
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,255,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,255,0.011887999872366587
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,511,0.016282666474580765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,511,0.016042667130629223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,1023,0.017642666896184284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,1023,0.01722666621208191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,2047,0.01762666677435239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,4095,0.017685333887736004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,4095,0.01762666677435239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,2047,0.01738133281469345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,8191,0.018570666511853535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,8191,0.012549333274364471
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,16383,0.02000533292690913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,16383,0.014736000448465347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,32767,0.02455466737349828
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,32767,0.01889066646496455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,65535,0.029285334050655365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,65535,0.024160000185171764
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,float16,131071,0.03958400090535482
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,1,128,1,float16,fp8,131071,0.03107200066248576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,1,0.009237333511312803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,1,0.009472000102202097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,3,0.009248000259200731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,3,0.009402666861812273
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,7,0.009109333157539368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,7,0.009359999870260557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,15,0.00943999985853831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,15,0.009722666814923286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,31,0.010197333370645842
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,31,0.010165333126982054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,63,0.011930666863918304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,63,0.01179733375708262
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,127,0.011968000481526056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,127,0.011813333878914515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,255,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,511,0.01628799984852473
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,255,0.011866666376590729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,511,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,1023,0.01725333308180173
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,2047,0.017445333302021027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,2047,0.0173333336909612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,1023,0.017450666675964992
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,4095,0.010314666976531347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,4095,0.015722667177518208
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,8191,0.012309333930412928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,8191,0.01887999971707662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,16383,0.020960000654061634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,16383,0.01492799942692121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,32767,0.01889066646496455
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,32767,0.0252960001428922
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,65535,0.03181866556406021
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,65535,0.02385066697994868
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,1,0.009343999748428663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,float16,131071,0.07070933282375336
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1,1,4,2,128,1,float16,fp8,131071,0.03531199942032496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,1,0.009359999870260557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,3,0.00943999985853831
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,3,0.009338666374484697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,7,0.00919999989370505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,7,0.00972800018886725
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,15,0.009642666826645533
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,15,0.009648000200589498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,31,0.010005333150426546
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,31,0.010165333126982054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,63,0.011936000237862269
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,63,0.011706666400035223
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,127,0.01198400060335795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,127,0.011957333733638128
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,255,0.012037333101034164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,511,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,255,0.012117333710193634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,511,0.016469333320856094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,1023,0.01747200017174085
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,1023,0.017290666699409485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,2047,0.017887999614079792
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,2047,0.01756799966096878
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,4095,0.016122666498025257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,4095,0.010458666831254959
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,8191,0.01978133370478948
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,8191,0.01268799975514412
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,16383,0.021317332983016968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,16383,0.014688000082969666
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,32767,0.02566933383544286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,32767,0.019050666441520054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,65535,0.03199466566244761
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,1,0.009365333244204521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,65535,0.023957334458827972
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,1,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,float16,131071,0.0613973339398702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,1,128,1,float16,fp8,131071,0.03597866743803024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,3,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,3,0.00961599995692571
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,7,0.009375999992092451
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,7,0.009472000102202097
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,15,0.009477333476146063
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,15,0.009599999835093817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,31,0.010266666611035665
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,31,0.01032533310353756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,63,0.012245333443085352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,63,0.011962667107582092
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,127,0.012005332857370377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,255,0.011920000116030375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,127,0.012053333222866058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,255,0.01190399999419848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,511,0.016399999459584553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,511,0.016399999459584553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,1023,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,1023,0.01736533393462499
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,2047,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,2047,0.00915733352303505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,4095,0.016719999412695568
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,4095,0.010341333225369453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,8191,0.020517333100239437
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,8191,0.012576000144084295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,16383,0.022672000030676525
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,16383,0.015173333386580149
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,32767,0.03752533346414566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,32767,0.02199999988079071
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,65535,0.05269333223501841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,65535,0.027978666126728058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,float16,131071,0.08524266878763835
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,1,0.009136000027259191
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2,1,4,2,128,1,float16,fp8,131071,0.04262933135032654
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,3,0.009103999783595404
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,1,0.009082666908701261
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,3,0.009530666594703993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,7,0.009232000137368837
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,7,0.009370666618148485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,15,0.0099093330403169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,15,0.010298666854699453
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,31,0.011616000284751257
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,31,0.011813333878914515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,63,0.011749333391586939
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,63,0.011866666376590729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,127,0.010399999717871347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,127,0.0064853330453236895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,255,0.013541333377361298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,255,0.007701333612203598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,511,0.019178666174411774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,511,0.008687999720374743
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,1023,0.014021333307027817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,1023,0.027978666126728058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,2047,0.04394133388996124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,2047,0.017711999515692394
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,4095,0.027258666853109997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,4095,0.0692853331565857
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,8191,0.052111998200416565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,8191,0.08860799670219421
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,16383,0.06549333532651265
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,16383,0.1291253368059794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,32767,0.21326400836308798
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,32767,0.11251200238863628
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,1,0.008986666798591614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,1,0.00926399976015091
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,3,0.009279999881982803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,3,0.009392000113924345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,7,0.009216000015536943
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,7,0.009434666484594345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,15,0.010090666512648264
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,15,0.01028266673286756
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,float16,65535,0.3831839958826701
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,1,128,1,float16,fp8,65535,0.20757333437601724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,31,0.011754666765530905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,31,0.011920000116030375
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,63,0.011802667131026586
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,63,0.011866666376590729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,127,0.010426666587591171
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,127,0.0069866664707660675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,255,0.008639999975760778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,255,0.020986666282018025
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,511,0.02844800055027008
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,511,0.013354666531085968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,1023,0.019989332805077236
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,1023,0.04637333254019419
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,2047,0.06611200173695882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,2047,0.025920001169045765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,4095,0.04035733391841253
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,4095,0.08731733759244283
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,8191,0.1260640025138855
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,8191,0.06388266881306966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,16383,0.30480533838272095
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,16383,0.11074666182200114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,1,0.009322666873534521
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,1,0.009743999689817429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,32767,0.3800640106201172
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,32767,0.20398932695388794
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,3,0.009306666751702627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,3,0.009599999835093817
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,7,0.009130666653315226
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,fp8,65535,0.39162667592366535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,64,1,4,2,128,1,float16,float16,65535,0.7178560098012289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,7,0.009706666693091393
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,15,0.00980266680320104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,15,0.009589333087205887
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,31,0.010224000240365664
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,31,0.010191999996701876
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,63,0.011989332735538483
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,63,0.011813333878914515
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,255,0.012058666596810022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,255,0.011866666376590729
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,127,0.012335999558369318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,127,0.012186666329701742
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,511,0.01632533346613248
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,511,0.016613333175579708
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,1023,0.01758933315674464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,1023,0.01752000053723653
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,2047,0.016688000410795212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,4095,0.018063999712467194
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,4095,0.010496000448862711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,2047,0.00919999989370505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,8191,0.021477334201335907
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,8191,0.01258133351802826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,16383,0.023541333774725597
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,16383,0.01515199989080429
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,32767,0.037248000502586365
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,32767,0.02144533395767212
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,65535,0.053743998209635414
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,65535,0.028079998989899952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,1,0.009189333145817121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,1,0.009583999713261923
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,float16,131071,0.10757333040237427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,3,0.00915733352303505
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,3,0.009712000067035357
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,1,128,1,float16,fp8,131071,0.04237333436806997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,7,0.009493333597977957
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,7,0.00960533320903778
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,15,0.009808000177145004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,15,0.009594666461149851
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,31,0.010357333347201347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,31,0.010319999729593595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,63,0.012122667084137598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,127,0.012122667084137598
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,127,0.012015999605258306
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,63,0.012005332857370377
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,255,0.012080000092585882
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,255,0.012144000579913458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,511,0.016389333953460056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,511,0.016399999459584553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,1023,0.013637332866589228
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,1023,0.008282666405042013
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,2047,0.01703466723362605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,2047,0.009226666763424873
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,4095,0.01882133384545644
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,4095,0.010666667173306147
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,8191,0.02808533360560735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,8191,0.014271999398867289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,16383,0.03364266703526179
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,16383,0.017456000049908955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,32767,0.05470400055249532
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,32767,0.024858665963013966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,65535,0.08160000046094258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,65535,0.0499893327554067
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,1,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,1,0.015450666348139444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,float16,131071,0.18081599473953247
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,3,0.015082667271296183
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,4,1,4,2,128,1,float16,fp8,131071,0.0682773341735204
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,3,0.015578666081031164
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,7,0.015333333363135656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,7,0.015509333461523056
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,15,0.016650666793187458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,31,0.0198186660806338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,31,0.02000533292690913
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,63,0.020047999918460846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,15,0.016794666647911072
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,127,0.010522666076819101
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,127,0.006522666662931442
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,63,0.020026666422684986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,255,0.008383999889095625
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,255,0.020960000654061634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,511,0.02792533238728841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,511,0.013541333377361298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,1023,0.02025066688656807
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,2047,0.06665599842866261
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,1023,0.04624533156553904
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,2047,0.026629333694775898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,4095,0.10028800368309021
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,4095,0.040607998768488564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,8191,0.12745066483815512
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,8191,0.0645066648721695
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,1,0.014912000546852747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,1,0.01534933348496755
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,3,0.015397333850463232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,16383,0.11218133568763733
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,16383,0.21331733465194702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,3,0.015605332950750986
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,7,0.01516266663869222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,7,0.015696000307798386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,15,0.016704000532627106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,fp8,32767,0.20570667584737143
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,15,0.01692266638080279
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,31,0.020031999796628952
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,31,0.019978666057189304
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,63,0.020058666666348774
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,63,0.020175999651352566
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,127,0.01573866605758667
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,1,128,1,float16,float16,32767,0.385045329729716
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,127,0.006858666737874349
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,255,0.03032533327738444
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,255,0.01341333364446958
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,511,0.019610666980346043
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,511,0.0469706654548645
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,1023,0.06228266656398773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,1023,0.025594666600227356
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,2047,0.040607998768488564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,2047,0.08593066533406575
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,4095,0.12787200013796488
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,4095,0.06442666550477345
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,8191,0.30530667304992676
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,8191,0.11106666922569275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,16383,0.3849066495895386
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,1,0.014959999670584997
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,16383,0.20469866196314493
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,3,0.01524266724785169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,1,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,7,0.01573333392540614
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,3,0.015594666202863058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,7,0.016197333733240765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,fp8,32767,0.3922346830368042
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,128,1,4,2,128,1,float16,float16,32767,0.7235626379648844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,15,0.01942933350801468
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,15,0.019440000255902607
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,31,0.019578666736682255
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,31,0.019386666516462963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,63,0.01953599974513054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,127,0.015466666469971338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,63,0.019472000499566395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,127,0.007216000308593114
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,255,0.030346666773160298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,255,0.013584000368913015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,511,0.04671466847260793
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,511,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,1023,0.06206400195757548
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,1023,0.02625600000222524
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,2047,0.05178666611512502
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,2047,0.08640000224113464
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,4095,0.12819733222325644
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,4095,0.06462400158246358
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,1,0.014831999937693277
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,1,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,3,0.015402667224407196
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,fp8,8191,0.11132267117500305
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,7,0.015893333901961643
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,1,128,1,float16,float16,8191,0.2160266637802124
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,7,0.0161013330022494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,15,0.019461333751678467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,3,0.015562667200962702
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,15,0.01941866676012675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,31,0.01956266661485036
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,31,0.01937599976857503
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,63,0.01964266722400983
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,127,0.011226666470368704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,63,0.019706666469573975
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,127,0.025349333882331848
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,255,0.0499946673711141
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,255,0.019930666933457058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,511,0.06948266426722209
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,511,0.02601066728432973
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,1023,0.08278400202592213
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,1023,0.040847999354203544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,2047,0.13005333145459494
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,2047,0.0645653357108434
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,1,0.025920001169045765
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,4095,0.1111253301302592
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,4095,0.2181546688079834
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,1,0.026533332963784535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,3,0.026922665536403656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,float16,8191,0.5849279959996542
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,3,0.02735999971628189
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,256,1,4,2,128,1,float16,fp8,8191,0.20551466941833496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,7,0.027765333652496338
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,7,0.02792533238728841
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,15,0.034789333740870156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,15,0.03488533447186152
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,31,0.0348693331082662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,31,0.03509333233038584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,63,0.03531199942032496
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,63,0.03499199946721395
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,127,0.025605333348115284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,127,0.011152000476916632
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,511,0.07044800122578938
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,255,0.05003733436266581
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,511,0.026501332720120747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,255,0.020015999674797058
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,1023,0.04105599969625473
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,1023,0.08340266346931458
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,2047,0.13000532984733582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,2047,0.0653599997361501
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,1,0.02593066543340683
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,3,0.02682666728893916
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,1,0.026464000344276428
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,3,0.027424000203609467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,7,0.027850667635599773
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,7,0.02811199923356374
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,15,0.03495466709136963
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,float16,4095,0.31069332361221313
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,15,0.034976000587145485
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,31,0.03521066655715307
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,63,0.03537066777547201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,31,0.035258665680885315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,63,0.03522133330504099
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,1,128,1,float16,fp8,4095,0.11189333597819011
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,127,0.045312002301216125
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,255,0.06010666489601135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,255,0.02199466774861018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,127,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,511,0.07267199953397115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,511,0.03700799991687139
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,1023,0.11532800396283467
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,1,0.009445333232482275
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,2047,0.20675732692082724
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,1,0.00978133330742518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,1023,0.06100266675154368
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,2047,0.15177067120869955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,3,0.00949866697192192
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,3,0.009685333197315535
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,7,0.009663999701539675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,7,0.009466666728258133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,15,0.009888000165422758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,float16,4095,0.391541322072347
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,31,0.010277333358923594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,15,0.010832000523805618
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,31,0.01044800008336703
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,63,0.012154666086037954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,63,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,512,1,4,2,128,1,float16,fp8,4095,0.20064000288645426
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,127,0.012085333466529846
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,255,0.012495999534924826
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,127,0.012106666962305704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,511,0.016575999557971954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,255,0.012245333443085352
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,511,0.01681600014368693
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,1023,0.013568000247081121
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,2047,0.017210666090250015
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,2047,0.00921066664159298
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,1023,0.009279999881982803
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,4095,0.010634666929642359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,4095,0.018874666343132656
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,8191,0.014202666779359182
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,8191,0.027813332776228588
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,16383,0.03347733368476232
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,16383,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,32767,0.054058666030565895
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,65535,0.08214400211970012
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,65535,0.05107733110586802
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,32767,0.024879999458789825
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,1,0.009466666728258133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,float16,131071,0.1346933344999949
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,1,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,3,0.009509333098928133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,3,0.00979200005531311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,1,128,1,float16,fp8,131071,0.06910933554172516
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,7,0.009530666594703993
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,15,0.009541333342591921
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,15,0.009930666536092758
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,31,0.010277333358923594
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,7,0.011002667248249054
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,31,0.010496000448862711
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,63,0.012117333710193634
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,63,0.012191999703645706
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,127,0.01209066684047381
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,255,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,127,0.012154666086037954
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,255,0.012282667060693106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,511,0.008090666805704435
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,511,0.013002666334311167
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,1023,0.013232000172138214
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,1023,0.008367999767263731
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,2047,0.02216533323129018
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,2047,0.010213333492477735
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,4095,0.01191466674208641
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,8191,0.03738666574160258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,4095,0.025477332373460133
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,8191,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,16383,0.05602133274078369
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,16383,0.027589333554108936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,32767,0.08334933718045552
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,32767,0.041322665909926094
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,65535,0.17813332875569662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,65535,0.07149333258469899
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,1,0.04780266682306925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,float16,131071,0.2344320019086202
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,3,0.04920533299446106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,1,0.04852266609668732
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,3,0.05020266771316528
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,7,0.051354666550954185
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,8,1,4,2,128,1,float16,fp8,131071,0.11598933736483256
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,7,0.051738664507865906
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,15,0.06432533264160156
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,63,0.06484266618887584
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,63,0.06469866633415222
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,15,0.0643093337615331
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,31,0.06467733283837636
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,127,0.016314666718244553
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,31,0.06434133152167003
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,127,0.045594667394955955
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,255,0.05493866900602976
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,255,0.02266666789849599
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,511,0.09468266367912292
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,1,0.04797333478927612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,511,0.03756800045569738
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,1,0.04879466692606608
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,1023,0.060959999759991966
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,3,0.0495413343111674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,3,0.050698667764663696
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,1023,0.11818666259447734
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,7,0.052111998200416565
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,15,0.06447466711203258
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,7,0.05162666738033295
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,float16,2047,0.20853332678476968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,15,0.064560001095136
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,31,0.06491733094056447
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,63,0.06984533369541168
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,31,0.0646666685740153
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,63,0.06500266492366791
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,1,128,1,float16,fp8,2047,0.10807999968528748
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,127,0.08258666594823201
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,255,0.10427733262379964
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,255,0.04275733232498169
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,127,0.029893333713213604
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,511,0.13019200166066489
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,511,0.06554666658242543
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,1023,0.2100106676419576
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,1,0.0904266635576884
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,1,0.09218133489290874
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,3,0.09352533022562663
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,1023,0.11161599556605022
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,float16,2047,0.38813865184783936
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,1024,1,4,2,128,1,float16,fp8,2047,0.30265067021052044
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,7,0.09739733735720317
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,7,0.09870400031407674
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,3,0.09484799702962239
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,15,0.12265599767367046
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,15,0.12277866403261821
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,31,0.1228000024954478
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,63,0.1346399982770284
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,31,0.12327999869982402
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,63,0.12316800157229106
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,127,0.08391466736793518
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,127,0.0317546675602595
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,255,0.04353066782156626
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,255,0.09654933214187622
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,1,0.09052266677220662
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,1,0.09258133172988892
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,3,0.09385599692662557
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,3,0.09577600161234538
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,511,0.088319996992747
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,511,0.13272000352541605
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,7,0.09773332873980205
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,7,0.09936533371607463
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,15,0.12314132849375407
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,15,0.12333333492279053
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,31,0.13461866974830627
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,31,0.12437333663304646
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,63,0.13239999612172446
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,fp8,1023,0.11170666416486104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,63,0.13326932986577353
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,127,0.15381866693496704
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,127,0.05705599983533224
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,1,128,1,float16,float16,1023,0.21465599536895752
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,255,0.19216533501942953
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,255,0.0771679977575938
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,1,0.009621333330869675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,3,0.009525333220760027
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,1,0.00983466642598311
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,3,0.009850666547815004
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,7,0.009824000298976898
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,7,0.00984533317387104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,15,0.009722666814923286
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,15,0.009999999776482582
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,31,0.010431999961535135
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,63,0.01227733368674914
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,31,0.010784000158309937
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,511,0.24512000878651938
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,63,0.012250666817029318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,127,0.012128000458081564
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,127,0.012309333930412928
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,255,0.012634667257467905
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,511,0.012671999633312225
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,511,0.11763733625411987
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,255,0.012362666428089142
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,511,0.007717333113153775
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,fp8,1023,0.2007733384768168
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,1023,0.013477332890033722
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,1023,0.0084906667470932
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,2047,0.022261333962281544
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,4095,0.012063999970753988
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,2047,0.01163200040658315
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,4095,0.025616000096003216
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,8191,0.03729599962631861
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,8191,0.0161920003592968
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,16383,0.05585066477457682
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,16383,0.027647999425729115
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,32767,0.1018239955107371
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,32767,0.041797334949175514
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,65535,0.07169066866238911
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,1,0.009658666948477427
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,65535,0.13588266571362814
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,float16,131071,0.23138133684794107
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,2048,1,4,2,128,1,float16,float16,1023,0.396725336710612
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,1,0.00997866690158844
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,1,128,1,float16,fp8,131071,0.11635733644167583
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,3,0.009861333295702934
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,7,0.009663999701539675
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,3,0.010645333677530289
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,7,0.010725333044926325
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,15,0.009898666913310686
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,15,0.010015999898314476
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,31,0.010538666198650995
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,63,0.012234666695197424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,63,0.012261333564917246
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,255,0.013114667187134424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,127,0.012335999558369318
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,127,0.012319999436537424
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,255,0.007514666765928268
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,511,0.013429333766301474
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,511,0.007786666974425316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,1023,0.01930133377512296
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,1023,0.009258666386206945
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,2047,0.029114666084448498
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,4095,0.018426666657129925
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,4095,0.0417546679576238
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,2047,0.011402666568756104
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,8191,0.05975466469923655
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,8191,0.025061334172884624
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,16383,0.09098666906356812
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,16383,0.04334400097529093
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,32767,0.13083199659983316
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,32767,0.08798399567604065
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,65535,0.21778132518132529
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,65535,0.11183999975522359
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,fp8,131071,0.20279467105865479
TRTLLM,0.20.0,NVIDIA H100 80GB HBM3,generation_attention,16,1,4,2,128,1,float16,float16,131071,0.4055413405100505
